diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 6eb007253c..ff261bad78 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.15.2" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bbeb30b148..c17fdc169f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,6 +24,9 @@
         }
       }
     }
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {}
   }
 
   // Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 3ce5f8d004..d58c8454c5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,4 @@
+# This file is used to automatically assign reviewers to PRs
+# For more information see: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
+
 * @openai/sdks-team
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c031d9a1d1..7991b3e7c7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,41 +1,109 @@
 name: CI
 on:
   push:
-    branches:
-      - main
+    branches-ignore:
+      - 'generated'
+      - 'codegen/**'
+      - 'integrated/**'
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
   pull_request:
-    branches:
-      - main
+    branches-ignore:
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
 
 jobs:
   lint:
+    timeout-minutes: 10
     name: lint
-    runs-on: ubuntu-latest
-    if: github.repository == 'openai/openai-python'
-
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
-        run: |
-          rye sync --all-features
+        run: rye sync --all-features
+
+      - name: Run lints
+        run: ./scripts/lint
+
+  upload:
+    if: github.repository == 'stainless-sdks/openai-python'
+    timeout-minutes: 10
+    name: upload
+    permissions:
+      contents: read
+      id-token: write
+    runs-on: depot-ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Run ruff
+      - name: Get GitHub OIDC Token
+        id: github-oidc
+        uses: actions/github-script@v6
+        with:
+          script: core.setOutput('github_token', await core.getIDToken());
+
+      - name: Upload tarball
+        env:
+          URL: https://pkg.stainless.com/s
+          AUTH: ${{ steps.github-oidc.outputs.github_token }}
+          SHA: ${{ github.sha }}
+        run: ./scripts/utils/upload-artifact.sh
+
+  test:
+    timeout-minutes: 10
+    name: test
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
         run: |
-          rye run check:ruff
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: Run tests
+        run: ./scripts/test
+
+  examples:
+    timeout-minutes: 10
+    name: examples
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.repository == 'openai/openai-python'
+
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Run type checking
+      - name: Install Rye
         run: |
-          rye run typecheck
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+      - name: Install dependencies
+        run: |
+          rye sync --all-features
 
-      - name: Ensure importable
+      - env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+            rye run python examples/demo.py
+      - env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: |
-          rye run python -c 'import openai'
+            rye run python examples/async_demo.py
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
index 7dbae006c0..b3e1c679d4 100644
--- a/.github/workflows/create-releases.yml
+++ b/.github/workflows/create-releases.yml
@@ -1,5 +1,7 @@
 name: Create releases
 on:
+  schedule:
+    - cron: '0 5 * * *' # every day at 5am UTC
   push:
     branches:
       - main
@@ -12,7 +14,7 @@ jobs:
     environment: publish
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: stainless-api/trigger-release-please@v1
         id: release
@@ -23,11 +25,11 @@ jobs:
       - name: Install Rye
         if: ${{ steps.release.outputs.releases_created }}
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         if: ${{ steps.release.outputs.releases_created }}
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 026ed29c22..32bd6929e2 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -8,17 +8,18 @@ jobs:
   publish:
     name: publish
     runs-on: ubuntu-latest
+    environment: publish
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.15.2
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         run: |
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 108aa5973a..e078964a6f 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -13,7 +13,7 @@ jobs:
     if: github.repository == 'openai/openai-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Check release environment
         run: |
diff --git a/.gitignore b/.gitignore
index a4b2f8c0bd..70815df7f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.prism.log
 .vscode
 _dev
 
@@ -12,3 +13,8 @@ dist
 .env
 .envrc
 codegen.log
+Brewfile.lock.json
+
+.DS_Store
+
+examples/*.mp3
diff --git a/.inline-snapshot/external/.gitignore b/.inline-snapshot/external/.gitignore
new file mode 100644
index 0000000000..45bef68be1
--- /dev/null
+++ b/.inline-snapshot/external/.gitignore
@@ -0,0 +1,2 @@
+# ignore all snapshots which are not refered in the source
+*-new.*
diff --git a/.inline-snapshot/external/173417d553406f034f643e5db3f8d591fb691ebac56f5ae39a22cc7d455c5353.bin b/.inline-snapshot/external/173417d553406f034f643e5db3f8d591fb691ebac56f5ae39a22cc7d455c5353.bin
new file mode 100644
index 0000000000..49c6dce93f
--- /dev/null
+++ b/.inline-snapshot/external/173417d553406f034f643e5db3f8d591fb691ebac56f5ae39a22cc7d455c5353.bin
@@ -0,0 +1,28 @@
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":null,"refusal":""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":"I'm"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" sorry"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":","},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" I"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" can't"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" assist"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" with"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" that"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" request"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":"."},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw4IfQfCCrcuybFm41wJyxjbkz7","object":"chat.completion.chunk","created":1727346172,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":79,"completion_tokens":11,"total_tokens":90,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/2018feb66ae13fcf5333d61b95849decc68d3f63bd38172889367e1afb1e04f7.bin b/.inline-snapshot/external/2018feb66ae13fcf5333d61b95849decc68d3f63bd38172889367e1afb1e04f7.bin
new file mode 100644
index 0000000000..871970676f
--- /dev/null
+++ b/.inline-snapshot/external/2018feb66ae13fcf5333d61b95849decc68d3f63bd38172889367e1afb1e04f7.bin
@@ -0,0 +1,22 @@
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_4XzlGBLtUe9dy3GVNV4jhq7h","type":"function","function":{"name":"get_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"New"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" York"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" City"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+data: {"id":"chatcmpl-ABfwERreu9s99xXsVuOWtIB2UOx62","object":"chat.completion.chunk","created":1727346182,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_143bb8492c","choices":[],"usage":{"prompt_tokens":44,"completion_tokens":16,"total_tokens":60,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/4cc50a6135d254573a502310e6af1246f55edb6ad95fa24059f160996b68866d.bin b/.inline-snapshot/external/4cc50a6135d254573a502310e6af1246f55edb6ad95fa24059f160996b68866d.bin
new file mode 100644
index 0000000000..c3392883be
--- /dev/null
+++ b/.inline-snapshot/external/4cc50a6135d254573a502310e6af1246f55edb6ad95fa24059f160996b68866d.bin
@@ -0,0 +1,10 @@
+data: {"id":"chatcmpl-ABfw3Oqj8RD0z6aJiiX37oTjV2HFh","object":"chat.completion.chunk","created":1727346171,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw3Oqj8RD0z6aJiiX37oTjV2HFh","object":"chat.completion.chunk","created":1727346171,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"content":"{\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw3Oqj8RD0z6aJiiX37oTjV2HFh","object":"chat.completion.chunk","created":1727346171,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"length"}]}
+
+data: {"id":"chatcmpl-ABfw3Oqj8RD0z6aJiiX37oTjV2HFh","object":"chat.completion.chunk","created":1727346171,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[],"usage":{"prompt_tokens":79,"completion_tokens":1,"total_tokens":80,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/569c877e69429d4cbc1577d2cd6dd33878095c68badc6b6654a69769b391a1c1.bin b/.inline-snapshot/external/569c877e69429d4cbc1577d2cd6dd33878095c68badc6b6654a69769b391a1c1.bin
new file mode 100644
index 0000000000..47dd73151c
--- /dev/null
+++ b/.inline-snapshot/external/569c877e69429d4cbc1577d2cd6dd33878095c68badc6b6654a69769b391a1c1.bin
@@ -0,0 +1,30 @@
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":null,"refusal":""},"logprobs":{"content":null,"refusal":[]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":"I'm"},"logprobs":{"content":null,"refusal":[{"token":"I'm","logprob":-0.0012038043,"bytes":[73,39,109],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" very"},"logprobs":{"content":null,"refusal":[{"token":" very","logprob":-0.8438816,"bytes":[32,118,101,114,121],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" sorry"},"logprobs":{"content":null,"refusal":[{"token":" sorry","logprob":-3.4121115e-6,"bytes":[32,115,111,114,114,121],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":","},"logprobs":{"content":null,"refusal":[{"token":",","logprob":-0.000033809047,"bytes":[44],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" but"},"logprobs":{"content":null,"refusal":[{"token":" but","logprob":-0.038048144,"bytes":[32,98,117,116],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" I"},"logprobs":{"content":null,"refusal":[{"token":" I","logprob":-0.0016109125,"bytes":[32,73],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" can't"},"logprobs":{"content":null,"refusal":[{"token":" can't","logprob":-0.0073532974,"bytes":[32,99,97,110,39,116],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" assist"},"logprobs":{"content":null,"refusal":[{"token":" assist","logprob":-0.0020837625,"bytes":[32,97,115,115,105,115,116],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" with"},"logprobs":{"content":null,"refusal":[{"token":" with","logprob":-0.00318354,"bytes":[32,119,105,116,104],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":" that"},"logprobs":{"content":null,"refusal":[{"token":" that","logprob":-0.0017186158,"bytes":[32,116,104,97,116],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"refusal":"."},"logprobs":{"content":null,"refusal":[{"token":".","logprob":-0.57687104,"bytes":[46],"top_logprobs":[]}]},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw5GEVqPbLY576l46FZDQoNJ2KC","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":79,"completion_tokens":12,"total_tokens":91,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/7e5ea4d12e7cc064399b6631415e65923f182256b6e6b752950a3aaa2ad2320a.bin b/.inline-snapshot/external/7e5ea4d12e7cc064399b6631415e65923f182256b6e6b752950a3aaa2ad2320a.bin
new file mode 100644
index 0000000000..801db2adf2
--- /dev/null
+++ b/.inline-snapshot/external/7e5ea4d12e7cc064399b6631415e65923f182256b6e6b752950a3aaa2ad2320a.bin
@@ -0,0 +1,36 @@
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"{\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"city"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"temperature"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"61"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"units"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"f"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\"}"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF","object":"chat.completion.chunk","created":1727346169,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":79,"completion_tokens":14,"total_tokens":93,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/83b060bae42eb41c4f1edbb7c1542b954b37d9dfd1910b964ddebc9677e6ae85.bin b/.inline-snapshot/external/83b060bae42eb41c4f1edbb7c1542b954b37d9dfd1910b964ddebc9677e6ae85.bin
new file mode 100644
index 0000000000..e9f34b6334
--- /dev/null
+++ b/.inline-snapshot/external/83b060bae42eb41c4f1edbb7c1542b954b37d9dfd1910b964ddebc9677e6ae85.bin
@@ -0,0 +1,12 @@
+data: {"id":"chatcmpl-ABfw5EzoqmfXjnnsXY7Yd8OC6tb3c","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":{"content":[],"refusal":null},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5EzoqmfXjnnsXY7Yd8OC6tb3c","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Foo"},"logprobs":{"content":[{"token":"Foo","logprob":-0.0025094282,"bytes":[70,111,111],"top_logprobs":[]}],"refusal":null},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5EzoqmfXjnnsXY7Yd8OC6tb3c","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"!"},"logprobs":{"content":[{"token":"!","logprob":-0.26638845,"bytes":[33],"top_logprobs":[]}],"refusal":null},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw5EzoqmfXjnnsXY7Yd8OC6tb3c","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw5EzoqmfXjnnsXY7Yd8OC6tb3c","object":"chat.completion.chunk","created":1727346173,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":2,"total_tokens":11,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/a247c49c5fcd492bfb7a02a3306ad615ed8d8f649888ebfddfbc3ee151f44d46.bin b/.inline-snapshot/external/a247c49c5fcd492bfb7a02a3306ad615ed8d8f649888ebfddfbc3ee151f44d46.bin
new file mode 100644
index 0000000000..b44d334ac5
--- /dev/null
+++ b/.inline-snapshot/external/a247c49c5fcd492bfb7a02a3306ad615ed8d8f649888ebfddfbc3ee151f44d46.bin
@@ -0,0 +1,28 @@
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_CTf1nWJLqSeRgDqaCG27xZ74","type":"function","function":{"name":"get_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" Francisco"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"state"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"CA"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+data: {"id":"chatcmpl-ABfwCgi41eStOcARjZq97ohCEGBPO","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[],"usage":{"prompt_tokens":48,"completion_tokens":19,"total_tokens":67,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/a491adda08c3d4fde95f5b2ee3f60f7f745f1a56d82e62f58031cc2add502380.bin b/.inline-snapshot/external/a491adda08c3d4fde95f5b2ee3f60f7f745f1a56d82e62f58031cc2add502380.bin
new file mode 100644
index 0000000000..160e65de49
--- /dev/null
+++ b/.inline-snapshot/external/a491adda08c3d4fde95f5b2ee3f60f7f745f1a56d82e62f58031cc2add502380.bin
@@ -0,0 +1,100 @@
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"{\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"{\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"{\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"city"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"city"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"city"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"\",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"\",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"\",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"temperature"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"temperature"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"temperature"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"65"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"61"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"59"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":",\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"units"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"units"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"units"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"\":\""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"f"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"f"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"f"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{"content":"\"}"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{"content":"\"}"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{"content":"\"}"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[{"index":2,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw2KKFuVXmEJgVwYfBvejMAdWtq","object":"chat.completion.chunk","created":1727346170,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_b40fb1c6fb","choices":[],"usage":{"prompt_tokens":79,"completion_tokens":42,"total_tokens":121,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/c6aa7e397b7123c3501f25df3a05d4daf7e8ad6d61ffa406ab5361fe36a8d5b1.bin b/.inline-snapshot/external/c6aa7e397b7123c3501f25df3a05d4daf7e8ad6d61ffa406ab5361fe36a8d5b1.bin
new file mode 100644
index 0000000000..f20333fbef
--- /dev/null
+++ b/.inline-snapshot/external/c6aa7e397b7123c3501f25df3a05d4daf7e8ad6d61ffa406ab5361fe36a8d5b1.bin
@@ -0,0 +1,36 @@
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_c91SqDXlYFuETYv8mUHzz6pp","type":"function","function":{"name":"GetWeatherArgs","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Ed"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"inburgh"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"country"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"UK"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"units"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+data: {"id":"chatcmpl-ABfw8AOXnoa2kzy11vVTSjuQhHCQr","object":"chat.completion.chunk","created":1727346176,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7568d46099","choices":[],"usage":{"prompt_tokens":76,"completion_tokens":24,"total_tokens":100,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/d615580118391ee13492193e3a8bb74642d23ac1ca13fe37cb6e889b66f759f6.bin b/.inline-snapshot/external/d615580118391ee13492193e3a8bb74642d23ac1ca13fe37cb6e889b66f759f6.bin
new file mode 100644
index 0000000000..aee8650c72
--- /dev/null
+++ b/.inline-snapshot/external/d615580118391ee13492193e3a8bb74642d23ac1ca13fe37cb6e889b66f759f6.bin
@@ -0,0 +1,362 @@
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" {\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"   "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"location"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" CA"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"   "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"weather"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" {\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"temperature"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"18"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"condition"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Part"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"ly"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" Cloud"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"y"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"humidity"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"72"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"%\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"wind"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Speed"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"15"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" km"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"/h"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"wind"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Direction"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"NW"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\"\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"   "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" },\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"   "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"forecast"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" [\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" {\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"day"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Monday"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"high"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"20"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"low"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"14"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"condition"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Sunny"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\"\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" },\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" {\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"day"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Tuesday"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"high"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"19"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"low"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"15"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"condition"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Mostly"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" Cloud"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"y"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\"\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" },\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" {\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"day"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Wednesday"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"high"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"18"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"low"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"14"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"°C"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\",\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"       "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"condition"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\":"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" \""},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"Cloud"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"y"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"\"\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"     "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" }\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"   "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" ]\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" }\n"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfwCjPMi0ubw56UyMIIeNfJzyogq","object":"chat.completion.chunk","created":1727346180,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":19,"completion_tokens":177,"total_tokens":196,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/e2aad469b71d1d4894ff833ea147020a9d875eb7ce644a0ff355581690a4cbfd.bin b/.inline-snapshot/external/e2aad469b71d1d4894ff833ea147020a9d875eb7ce644a0ff355581690a4cbfd.bin
new file mode 100644
index 0000000000..b68ca8a3d9
--- /dev/null
+++ b/.inline-snapshot/external/e2aad469b71d1d4894ff833ea147020a9d875eb7ce644a0ff355581690a4cbfd.bin
@@ -0,0 +1,68 @@
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"I'm"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" unable"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" provide"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" real"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"-time"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" weather"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" updates"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" To"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" get"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" the"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" current"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" weather"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" in"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" San"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" Francisco"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" recommend"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" checking"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" a"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" reliable"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" weather"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" website"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" or"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" a"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" weather"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":" app"},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+data: {"id":"chatcmpl-ABfw031mOJeYCSHe4yI2ZjOA6kMJL","object":"chat.completion.chunk","created":1727346168,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":14,"completion_tokens":30,"total_tokens":44,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.inline-snapshot/external/f82268f2fefd5cfbc7eeb59c297688be2f6ca0849a6e4f17851b517310841d9b.bin b/.inline-snapshot/external/f82268f2fefd5cfbc7eeb59c297688be2f6ca0849a6e4f17851b517310841d9b.bin
new file mode 100644
index 0000000000..3b111d5e61
--- /dev/null
+++ b/.inline-snapshot/external/f82268f2fefd5cfbc7eeb59c297688be2f6ca0849a6e4f17851b517310841d9b.bin
@@ -0,0 +1,52 @@
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_JMW1whyEaYG438VE1OIflxA2","type":"function","function":{"name":"GetWeatherArgs","arguments":""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"ci"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"ty\": "}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"Edinb"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"urgh"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\", \"c"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"ountry"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\": \""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"GB\", "}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"units"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\": \""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"id":"call_DNYTawLBoN8fj3KN6qU9N1Ou","type":"function","function":{"name":"get_stock_price","arguments":""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"ti"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"cker\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":": \"AAP"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"L\", "}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\"exch"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"ange\":"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":" \"NA"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"SDAQ\""}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{"tool_calls":[{"index":1,"function":{"arguments":"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+data: {"id":"chatcmpl-ABfwAwrNePHUgBBezonVC6MX3zd63","object":"chat.completion.chunk","created":1727346178,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_5050236cbd","choices":[],"usage":{"prompt_tokens":149,"completion_tokens":60,"total_tokens":209,"completion_tokens_details":{"reasoning_tokens":0}}}
+
+data: [DONE]
+
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 13787787c4..3ceb8e2f5b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.3.5"
+  ".": "1.93.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 03b0268ffa..535155f4ae 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1,4 @@
-configured_endpoints: 57
+configured_endpoints: 111
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a473967d1766dc155994d932fbc4a5bcbd1c140a37c20d0a4065e1bf0640536d.yml
+openapi_spec_hash: 67cdc62b0d6c8b1de29b7dc54b265749
+config_hash: 7b53f96f897ca1b3407a5341a6f820db
diff --git a/Brewfile b/Brewfile
new file mode 100644
index 0000000000..492ca37bb0
--- /dev/null
+++ b/Brewfile
@@ -0,0 +1,2 @@
+brew "rye"
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0869b3888c..3274b67105 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,2662 @@
 # Changelog
 
+## 1.93.0 (2025-06-27)
+
+Full Changelog: [v1.92.3...v1.93.0](https://github.com/openai/openai-python/compare/v1.92.3...v1.93.0)
+
+### Features
+
+* **cli:** add support for fine_tuning.jobs ([#1224](https://github.com/openai/openai-python/issues/1224)) ([e362bfd](https://github.com/openai/openai-python/commit/e362bfd10dfd04176560b964470ab0c517c601f3))
+
+## 1.92.3 (2025-06-27)
+
+Full Changelog: [v1.92.2...v1.92.3](https://github.com/openai/openai-python/compare/v1.92.2...v1.92.3)
+
+### Bug Fixes
+
+* **client:** avoid encoding error with empty API keys ([5a3e64e](https://github.com/openai/openai-python/commit/5a3e64e0cc761dbaa613fb22ec16e7e73c3bcf72))
+
+
+### Documentation
+
+* **examples/realtime:** mention macOS requirements ([#2142](https://github.com/openai/openai-python/issues/2142)) ([27bf6b2](https://github.com/openai/openai-python/commit/27bf6b2a933c61d5ec23fd266148af888f69f5c1))
+
+## 1.92.2 (2025-06-26)
+
+Full Changelog: [v1.92.1...v1.92.2](https://github.com/openai/openai-python/compare/v1.92.1...v1.92.2)
+
+### Chores
+
+* **api:** remove unsupported property ([ec24408](https://github.com/openai/openai-python/commit/ec2440864e03278144d7f58b97c31d87903e0843))
+
+## 1.92.1 (2025-06-26)
+
+Full Changelog: [v1.92.0...v1.92.1](https://github.com/openai/openai-python/compare/v1.92.0...v1.92.1)
+
+### Chores
+
+* **client:** sync stream/parse methods over ([e2536cf](https://github.com/openai/openai-python/commit/e2536cfd74224047cece9c2ad86f0ffe51c0667c))
+* **docs:** update README to include links to docs on Webhooks ([ddbf9f1](https://github.com/openai/openai-python/commit/ddbf9f1dc47a32257716189f2056b45933328c9c))
+
+## 1.92.0 (2025-06-26)
+
+Full Changelog: [v1.91.0...v1.92.0](https://github.com/openai/openai-python/compare/v1.91.0...v1.92.0)
+
+### Features
+
+* **api:** webhook and deep research support ([d3bb116](https://github.com/openai/openai-python/commit/d3bb116f34f470502f902b88131deec43a953b12))
+* **client:** move stream and parse out of beta ([0e358ed](https://github.com/openai/openai-python/commit/0e358ed66b317038705fb38958a449d284f3cb88))
+
+
+### Bug Fixes
+
+* **ci:** release-doctor — report correct token name ([ff8c556](https://github.com/openai/openai-python/commit/ff8c5561e44e8a0902732b5934c97299d2c98d4e))
+
+
+### Chores
+
+* **internal:** add tests for breaking change detection ([710fe8f](https://github.com/openai/openai-python/commit/710fe8fd5f9e33730338341680152d3f2556dfa0))
+* **tests:** skip some failing tests on the latest python versions ([93ccc38](https://github.com/openai/openai-python/commit/93ccc38a8ef1575d77d33d031666d07d10e4af72))
+
+## 1.91.0 (2025-06-23)
+
+Full Changelog: [v1.90.0...v1.91.0](https://github.com/openai/openai-python/compare/v1.90.0...v1.91.0)
+
+### Features
+
+* **api:** update api shapes for usage and code interpreter ([060d566](https://github.com/openai/openai-python/commit/060d5661e4a1fcdb953c52facd3e668ee80f9295))
+
+## 1.90.0 (2025-06-20)
+
+Full Changelog: [v1.89.0...v1.90.0](https://github.com/openai/openai-python/compare/v1.89.0...v1.90.0)
+
+### Features
+
+* **api:** make model and inputs not required to create response ([11bd62e](https://github.com/openai/openai-python/commit/11bd62eb7e46eec748edaf2e0cecf253ffc1202c))
+
+## 1.89.0 (2025-06-20)
+
+Full Changelog: [v1.88.0...v1.89.0](https://github.com/openai/openai-python/compare/v1.88.0...v1.89.0)
+
+### Features
+
+* **client:** add support for aiohttp ([9218b07](https://github.com/openai/openai-python/commit/9218b07727bf6f6eb00953df66de6ab061fecddb))
+
+
+### Bug Fixes
+
+* **tests:** fix: tests which call HTTP endpoints directly with the example parameters ([35bcc4b](https://github.com/openai/openai-python/commit/35bcc4b80bdbaa31108650f2a515902e83794e5a))
+
+
+### Chores
+
+* **readme:** update badges ([68044ee](https://github.com/openai/openai-python/commit/68044ee85d1bf324b17d3f60c914df4725d47fc8))
+
+## 1.88.0 (2025-06-17)
+
+Full Changelog: [v1.87.0...v1.88.0](https://github.com/openai/openai-python/compare/v1.87.0...v1.88.0)
+
+### Features
+
+* **api:** manual updates ([5d18a84](https://github.com/openai/openai-python/commit/5d18a8448ecbe31597e98ec7f64d7050c831901e))
+
+
+### Chores
+
+* **ci:** enable for pull requests ([542b0ce](https://github.com/openai/openai-python/commit/542b0ce98f14ccff4f9e1bcbd3a9ea5e4f846638))
+* **internal:** minor formatting ([29d723d](https://github.com/openai/openai-python/commit/29d723d1f1baf2a5843293c8647dc7baa16d56d1))
+
+## 1.87.0 (2025-06-16)
+
+Full Changelog: [v1.86.0...v1.87.0](https://github.com/openai/openai-python/compare/v1.86.0...v1.87.0)
+
+### Features
+
+* **api:** add reusable prompt IDs ([36bfe6e](https://github.com/openai/openai-python/commit/36bfe6e8ae12a31624ba1a360d9260f0aeec448a))
+
+
+### Bug Fixes
+
+* **client:** update service_tier on `client.beta.chat.completions` ([aa488d5](https://github.com/openai/openai-python/commit/aa488d5cf210d8640f87216538d4ff79d7181f2a))
+
+
+### Chores
+
+* **internal:** codegen related update ([b1a31e5](https://github.com/openai/openai-python/commit/b1a31e5ef4387d9f82cf33f9461371651788d381))
+* **internal:** update conftest.py ([bba0213](https://github.com/openai/openai-python/commit/bba0213842a4c161f2235e526d50901a336eecef))
+* **tests:** add tests for httpx client instantiation & proxies ([bc93712](https://github.com/openai/openai-python/commit/bc9371204f457aee9ed9b6ec1b61c2084f32faf1))
+
+## 1.86.0 (2025-06-10)
+
+Full Changelog: [v1.85.0...v1.86.0](https://github.com/openai/openai-python/compare/v1.85.0...v1.86.0)
+
+### Features
+
+* **api:** Add o3-pro model IDs ([d8dd80b](https://github.com/openai/openai-python/commit/d8dd80b1b4e6c73687d7acb6c3f62f0bf4b8282c))
+
+## 1.85.0 (2025-06-09)
+
+Full Changelog: [v1.84.0...v1.85.0](https://github.com/openai/openai-python/compare/v1.84.0...v1.85.0)
+
+### Features
+
+* **api:** Add tools and structured outputs to evals ([002cc7b](https://github.com/openai/openai-python/commit/002cc7bb3c315d95b81c2e497f55d21be7fd26f8))
+
+
+### Bug Fixes
+
+* **responses:** support raw responses for `parse()` ([d459943](https://github.com/openai/openai-python/commit/d459943cc1c81cf9ce5c426edd3ef9112fdf6723))
+
+## 1.84.0 (2025-06-03)
+
+Full Changelog: [v1.83.0...v1.84.0](https://github.com/openai/openai-python/compare/v1.83.0...v1.84.0)
+
+### Features
+
+* **api:** add new realtime and audio models, realtime session options ([0acd0da](https://github.com/openai/openai-python/commit/0acd0da6bc0468c6c857711bc5e77d0bc6d31be6))
+
+
+### Chores
+
+* **api:** update type names ([1924559](https://github.com/openai/openai-python/commit/192455913b38bf0323ddd0e2b1499b114e2111a1))
+
+## 1.83.0 (2025-06-02)
+
+Full Changelog: [v1.82.1...v1.83.0](https://github.com/openai/openai-python/compare/v1.82.1...v1.83.0)
+
+### Features
+
+* **api:** Config update for pakrym-stream-param ([88bcf3a](https://github.com/openai/openai-python/commit/88bcf3af9ce8ffa8347547d4d30aacac1ceba939))
+* **client:** add follow_redirects request option ([26d715f](https://github.com/openai/openai-python/commit/26d715f4e9b0f2b19e2ac16acc796a949338e1e1))
+
+
+### Bug Fixes
+
+* **api:** Fix evals and code interpreter interfaces ([2650159](https://github.com/openai/openai-python/commit/2650159f6d01f6eb481cf8c7942142e4fd21ce44))
+* **client:** return binary content from `get /containers/{container_id}/files/{file_id}/content` ([f7c80c4](https://github.com/openai/openai-python/commit/f7c80c4368434bd0be7436375076ba33a62f63b5))
+
+
+### Chores
+
+* **api:** mark some methods as deprecated ([3e2ca57](https://github.com/openai/openai-python/commit/3e2ca571cb6cdd9e15596590605b2f98a4c5a42e))
+* deprecate Assistants API ([9d166d7](https://github.com/openai/openai-python/commit/9d166d795e03dea49af680ec9597e9497522187c))
+* **docs:** remove reference to rye shell ([c7978e9](https://github.com/openai/openai-python/commit/c7978e9f1640c311022988fcd716cbb5c865daa8))
+
+## 1.82.1 (2025-05-29)
+
+Full Changelog: [v1.82.0...v1.82.1](https://github.com/openai/openai-python/compare/v1.82.0...v1.82.1)
+
+### Bug Fixes
+
+* **responses:** don't include `parsed_arguments` when re-serialising ([6d04193](https://github.com/openai/openai-python/commit/6d041937963ce452affcfb3553146ee51acfeb7a))
+
+
+### Chores
+
+* **internal:** fix release workflows ([361a909](https://github.com/openai/openai-python/commit/361a909a0cc83e5029ea425fd72202ffa8d1a46a))
+
+## 1.82.0 (2025-05-22)
+
+Full Changelog: [v1.81.0...v1.82.0](https://github.com/openai/openai-python/compare/v1.81.0...v1.82.0)
+
+### Features
+
+* **api:** new streaming helpers for background responses ([2a65d4d](https://github.com/openai/openai-python/commit/2a65d4de0aaba7801edd0df10f225530fd4969bd))
+
+
+### Bug Fixes
+
+* **azure:** mark images/edits as a deployment endpoint [#2371](https://github.com/openai/openai-python/issues/2371) ([5d1d5b4](https://github.com/openai/openai-python/commit/5d1d5b4b6072afe9fd7909b1a36014c8c11c1ad6))
+
+
+### Documentation
+
+* **readme:** another async example fix ([9ec8289](https://github.com/openai/openai-python/commit/9ec8289041f395805c67efd97847480f84eb9dac))
+* **readme:** fix async example ([37d0b25](https://github.com/openai/openai-python/commit/37d0b25b6e82cd381e5d1aa6e28f1a1311d02353))
+
+## 1.81.0 (2025-05-21)
+
+Full Changelog: [v1.80.0...v1.81.0](https://github.com/openai/openai-python/compare/v1.80.0...v1.81.0)
+
+### Features
+
+* **api:** add container endpoint ([054a210](https://github.com/openai/openai-python/commit/054a210289d7e0db22d2d2a61bbe4d4d9cc0cb47))
+
+## 1.80.0 (2025-05-21)
+
+Full Changelog: [v1.79.0...v1.80.0](https://github.com/openai/openai-python/compare/v1.79.0...v1.80.0)
+
+### Features
+
+* **api:** new API tools ([d36ae52](https://github.com/openai/openai-python/commit/d36ae528d55fe87067c4b8c6b2c947cbad5e5002))
+
+
+### Chores
+
+* **docs:** grammar improvements ([e746145](https://github.com/openai/openai-python/commit/e746145a12b5335d8841aff95c91bbbde8bae8e3))
+
+## 1.79.0 (2025-05-16)
+
+Full Changelog: [v1.78.1...v1.79.0](https://github.com/openai/openai-python/compare/v1.78.1...v1.79.0)
+
+### Features
+
+* **api:** further updates for evals API ([32c99a6](https://github.com/openai/openai-python/commit/32c99a6f5885d4bf3511a7f06b70000edd274301))
+* **api:** manual updates ([25245e5](https://github.com/openai/openai-python/commit/25245e5e3d0713abfb65b760aee1f12bc61deb41))
+* **api:** responses x eval api ([fd586cb](https://github.com/openai/openai-python/commit/fd586cbdf889c9a5c6b9be177ff02fbfffa3eba5))
+* **api:** Updating Assistants and Evals API schemas ([98ba7d3](https://github.com/openai/openai-python/commit/98ba7d355551213a13803f68d5642eecbb4ffd39))
+
+
+### Bug Fixes
+
+* fix create audio transcription endpoint ([e9a89ab](https://github.com/openai/openai-python/commit/e9a89ab7b6387610e433550207a23973b7edda3a))
+
+
+### Chores
+
+* **ci:** fix installation instructions ([f26c5fc](https://github.com/openai/openai-python/commit/f26c5fc85d98d700b68cb55c8be5d15983a9aeaf))
+* **ci:** upload sdks to package manager ([861f105](https://github.com/openai/openai-python/commit/861f1055768168ab04987a42efcd32a07bc93542))
+
+## 1.78.1 (2025-05-12)
+
+Full Changelog: [v1.78.0...v1.78.1](https://github.com/openai/openai-python/compare/v1.78.0...v1.78.1)
+
+### Bug Fixes
+
+* **internal:** fix linting due to broken __test__ annotation ([5a7d7a0](https://github.com/openai/openai-python/commit/5a7d7a081138c6473bff44e60d439812ecb85cdf))
+* **package:** support direct resource imports ([2293fc0](https://github.com/openai/openai-python/commit/2293fc0dd23a9c756067cdc22b39c18448f35feb))
+
+## 1.78.0 (2025-05-08)
+
+Full Changelog: [v1.77.0...v1.78.0](https://github.com/openai/openai-python/compare/v1.77.0...v1.78.0)
+
+### Features
+
+* **api:** Add reinforcement fine-tuning api support ([bebe361](https://github.com/openai/openai-python/commit/bebe36104bd3062d09ab9bbfb4bacfc99e737cb2))
+
+
+### Bug Fixes
+
+* ignore errors in isinstance() calls on LazyProxy subclasses ([#2343](https://github.com/openai/openai-python/issues/2343)) ([52cbbdf](https://github.com/openai/openai-python/commit/52cbbdf2207567741f16d18f1ea1b0d13d667375)), closes [#2056](https://github.com/openai/openai-python/issues/2056)
+
+
+### Chores
+
+* **internal:** update proxy tests ([b8e848d](https://github.com/openai/openai-python/commit/b8e848d5fb58472cbfa27fb3ed01efc25a05d944))
+* use lazy imports for module level client ([4d0f409](https://github.com/openai/openai-python/commit/4d0f409e79a18cce9855fe076f5a50e52b8bafd8))
+* use lazy imports for resources ([834813c](https://github.com/openai/openai-python/commit/834813c5cb1a84effc34e5eabed760393e1de806))
+
+## 1.77.0 (2025-05-02)
+
+Full Changelog: [v1.76.2...v1.77.0](https://github.com/openai/openai-python/compare/v1.76.2...v1.77.0)
+
+### Features
+
+* **api:** add image sizes, reasoning encryption ([473469a](https://github.com/openai/openai-python/commit/473469afa1a5f0a03f727bdcdadb9fd57872f9c5))
+
+
+### Bug Fixes
+
+* **parsing:** handle whitespace only strings ([#2007](https://github.com/openai/openai-python/issues/2007)) ([246bc5b](https://github.com/openai/openai-python/commit/246bc5b7559887840717667a0dad465caef66c3b))
+
+
+### Chores
+
+* only strip leading whitespace ([8467d66](https://github.com/openai/openai-python/commit/8467d666e0ddf1a9f81b8769a5c8a2fef1de20c1))
+
+## 1.76.2 (2025-04-29)
+
+Full Changelog: [v1.76.1...v1.76.2](https://github.com/openai/openai-python/compare/v1.76.1...v1.76.2)
+
+### Chores
+
+* **api:** API spec cleanup ([0a4d3e2](https://github.com/openai/openai-python/commit/0a4d3e2b495d22dd42ce1773b870554c64f9b3b2))
+
+## 1.76.1 (2025-04-29)
+
+Full Changelog: [v1.76.0...v1.76.1](https://github.com/openai/openai-python/compare/v1.76.0...v1.76.1)
+
+### Chores
+
+* broadly detect json family of content-type headers ([b4b1b08](https://github.com/openai/openai-python/commit/b4b1b086b512eecc0ada7fc1efa45eb506982f13))
+* **ci:** only use depot for staging repos ([35312d8](https://github.com/openai/openai-python/commit/35312d80e6bbc1a61d06ad253af9a713b5ef040c))
+* **ci:** run on more branches and use depot runners ([a6a45d4](https://github.com/openai/openai-python/commit/a6a45d4af8a4d904b37573a9b223d56106b4887d))
+
+## 1.76.0 (2025-04-23)
+
+Full Changelog: [v1.75.0...v1.76.0](https://github.com/openai/openai-python/compare/v1.75.0...v1.76.0)
+
+### Features
+
+* **api:** adding new image model support ([74d7692](https://github.com/openai/openai-python/commit/74d7692e94c9dca96db8793809d75631c22dbb87))
+
+
+### Bug Fixes
+
+* **pydantic v1:** more robust `ModelField.annotation` check ([#2163](https://github.com/openai/openai-python/issues/2163)) ([7351b12](https://github.com/openai/openai-python/commit/7351b12bc981f56632b92342d9ef26f6fb28d540))
+* **pydantic v1:** more robust ModelField.annotation check ([eba7856](https://github.com/openai/openai-python/commit/eba7856db55afb8cb44376a0248587549f7bc65f))
+
+
+### Chores
+
+* **ci:** add timeout thresholds for CI jobs ([0997211](https://github.com/openai/openai-python/commit/09972119df5dd4c7c8db137c721364787e22d4c6))
+* **internal:** fix list file params ([da2113c](https://github.com/openai/openai-python/commit/da2113c60b50b4438459325fcd38d55df3f63d8e))
+* **internal:** import reformatting ([b425fb9](https://github.com/openai/openai-python/commit/b425fb906f62550c3669b09b9d8575f3d4d8496b))
+* **internal:** minor formatting changes ([aed1d76](https://github.com/openai/openai-python/commit/aed1d767898324cf90328db329e04e89a77579c3))
+* **internal:** refactor retries to not use recursion ([8cb8cfa](https://github.com/openai/openai-python/commit/8cb8cfab48a4fed70a756ce50036e7e56e1f9f87))
+* **internal:** update models test ([870ad4e](https://github.com/openai/openai-python/commit/870ad4ed3a284d75f44b825503750129284c7906))
+* update completion parse signature ([a44016c](https://github.com/openai/openai-python/commit/a44016c64cdefe404e97592808ed3c25411ab27b))
+
+## 1.75.0 (2025-04-16)
+
+Full Changelog: [v1.74.1...v1.75.0](https://github.com/openai/openai-python/compare/v1.74.1...v1.75.0)
+
+### Features
+
+* **api:** add o3 and o4-mini model IDs ([4bacbd5](https://github.com/openai/openai-python/commit/4bacbd5503137e266c127dc643ebae496cb4f158))
+
+## 1.74.1 (2025-04-16)
+
+Full Changelog: [v1.74.0...v1.74.1](https://github.com/openai/openai-python/compare/v1.74.0...v1.74.1)
+
+### Chores
+
+* **internal:** base client updates ([06303b5](https://github.com/openai/openai-python/commit/06303b501f8c17040c495971a4ee79ae340f6f4a))
+* **internal:** bump pyright version ([9fd1c77](https://github.com/openai/openai-python/commit/9fd1c778c3231616bf1331cb1daa86fdfca4cb7f))
+
+## 1.74.0 (2025-04-14)
+
+Full Changelog: [v1.73.0...v1.74.0](https://github.com/openai/openai-python/compare/v1.73.0...v1.74.0)
+
+### Features
+
+* **api:** adding gpt-4.1 family of model IDs ([d4dae55](https://github.com/openai/openai-python/commit/d4dae5553ff3a2879b9ab79a6423661b212421f9))
+
+
+### Bug Fixes
+
+* **chat:** skip azure async filter events ([#2255](https://github.com/openai/openai-python/issues/2255)) ([fd3a38b](https://github.com/openai/openai-python/commit/fd3a38b1ed30af0a9f3302c1cfc6be6b352e65de))
+
+
+### Chores
+
+* **client:** minor internal fixes ([6071ae5](https://github.com/openai/openai-python/commit/6071ae5e8b4faa465afc8d07370737e66901900a))
+* **internal:** update pyright settings ([c8f8beb](https://github.com/openai/openai-python/commit/c8f8bebf852380a224701bc36826291d6387c53d))
+
+## 1.73.0 (2025-04-12)
+
+Full Changelog: [v1.72.0...v1.73.0](https://github.com/openai/openai-python/compare/v1.72.0...v1.73.0)
+
+### Features
+
+* **api:** manual updates ([a3253dd](https://github.com/openai/openai-python/commit/a3253dd798c1eccd9810d4fc593e8c2a568bcf4f))
+
+
+### Bug Fixes
+
+* **perf:** optimize some hot paths ([f79d39f](https://github.com/openai/openai-python/commit/f79d39fbcaea8f366a9e48c06fb1696bab3e607d))
+* **perf:** skip traversing types for NotGiven values ([28d220d](https://github.com/openai/openai-python/commit/28d220de3b4a09d80450d0bcc9b347bbf68f81ec))
+
+
+### Chores
+
+* **internal:** expand CI branch coverage ([#2295](https://github.com/openai/openai-python/issues/2295)) ([0ae783b](https://github.com/openai/openai-python/commit/0ae783b99122975be521365de0b6d2bce46056c9))
+* **internal:** reduce CI branch coverage ([2fb7d42](https://github.com/openai/openai-python/commit/2fb7d425cda679a54aa3262090479fd747363bb4))
+* slight wording improvement in README ([#2291](https://github.com/openai/openai-python/issues/2291)) ([e020759](https://github.com/openai/openai-python/commit/e0207598d16a2a9cb3cb3a8e8e97fa9cfdccd5e8))
+* workaround build errors ([4e10c96](https://github.com/openai/openai-python/commit/4e10c96a483db28dedc2d8c2908765fb7317e049))
+
+## 1.72.0 (2025-04-08)
+
+Full Changelog: [v1.71.0...v1.72.0](https://github.com/openai/openai-python/compare/v1.71.0...v1.72.0)
+
+### Features
+
+* **api:** Add evalapi to sdk ([#2287](https://github.com/openai/openai-python/issues/2287)) ([35262fc](https://github.com/openai/openai-python/commit/35262fcef6ccb7d1f75c9abdfdc68c3dcf87ef53))
+
+
+### Chores
+
+* **internal:** fix examples ([#2288](https://github.com/openai/openai-python/issues/2288)) ([39defd6](https://github.com/openai/openai-python/commit/39defd61e81ea0ec6b898be12e9fb7e621c0e532))
+* **internal:** skip broken test ([#2289](https://github.com/openai/openai-python/issues/2289)) ([e2c9bce](https://github.com/openai/openai-python/commit/e2c9bce1f59686ee053b495d06ea118b4a89e09e))
+* **internal:** slight transform perf improvement ([#2284](https://github.com/openai/openai-python/issues/2284)) ([746174f](https://github.com/openai/openai-python/commit/746174fae7a018ece5dab54fb0b5a15fcdd18f2f))
+* **tests:** improve enum examples ([#2286](https://github.com/openai/openai-python/issues/2286)) ([c9dd81c](https://github.com/openai/openai-python/commit/c9dd81ce0277e8b1f5db5e0a39c4c2bcd9004bcc))
+
+## 1.71.0 (2025-04-07)
+
+Full Changelog: [v1.70.0...v1.71.0](https://github.com/openai/openai-python/compare/v1.70.0...v1.71.0)
+
+### Features
+
+* **api:** manual updates ([bf8b4b6](https://github.com/openai/openai-python/commit/bf8b4b69906bfaea622c9c644270e985d92e2df2))
+* **api:** manual updates ([3e37aa3](https://github.com/openai/openai-python/commit/3e37aa3e151d9738625a1daf75d6243d6fdbe8f2))
+* **api:** manual updates ([dba9b65](https://github.com/openai/openai-python/commit/dba9b656fa5955b6eba8f6910da836a34de8d59d))
+* **api:** manual updates ([f0c463b](https://github.com/openai/openai-python/commit/f0c463b47836666d091b5f616871f1b94646d346))
+
+
+### Chores
+
+* **deps:** allow websockets v15 ([#2281](https://github.com/openai/openai-python/issues/2281)) ([19c619e](https://github.com/openai/openai-python/commit/19c619ea95839129a86c19d5b60133e1ed9f2746))
+* **internal:** only run examples workflow in main repo ([#2282](https://github.com/openai/openai-python/issues/2282)) ([c3e0927](https://github.com/openai/openai-python/commit/c3e0927d3fbbb9f753ba12adfa682a4235ba530a))
+* **internal:** remove trailing character ([#2277](https://github.com/openai/openai-python/issues/2277)) ([5a21a2d](https://github.com/openai/openai-python/commit/5a21a2d7994e39bb0c86271eeb807983a9ae874a))
+* Remove deprecated/unused remote spec feature ([23f76eb](https://github.com/openai/openai-python/commit/23f76eb0b9ddf12bcb04a6ad3f3ec5e956d2863f))
+
+## 1.70.0 (2025-03-31)
+
+Full Changelog: [v1.69.0...v1.70.0](https://github.com/openai/openai-python/compare/v1.69.0...v1.70.0)
+
+### Features
+
+* **api:** add `get /responses/{response_id}/input_items` endpoint ([4c6a35d](https://github.com/openai/openai-python/commit/4c6a35dec65362a6a738c3387dae57bf8cbfcbb2))
+
+## 1.69.0 (2025-03-27)
+
+Full Changelog: [v1.68.2...v1.69.0](https://github.com/openai/openai-python/compare/v1.68.2...v1.69.0)
+
+### Features
+
+* **api:** add `get /chat/completions` endpoint ([e6b8a42](https://github.com/openai/openai-python/commit/e6b8a42fc4286656cc86c2acd83692b170e77b68))
+
+
+### Bug Fixes
+
+* **audio:** correctly parse transcription stream events ([16a3a19](https://github.com/openai/openai-python/commit/16a3a195ff31f099fbe46043a12d2380c2c01f83))
+
+
+### Chores
+
+* add hash of OpenAPI spec/config inputs to .stats.yml ([515e1cd](https://github.com/openai/openai-python/commit/515e1cdd4a3109e5b29618df813656e17f22b52a))
+* **api:** updates to supported Voice IDs ([#2261](https://github.com/openai/openai-python/issues/2261)) ([64956f9](https://github.com/openai/openai-python/commit/64956f9d9889b04380c7f5eb926509d1efd523e6))
+* fix typos ([#2259](https://github.com/openai/openai-python/issues/2259)) ([6160de3](https://github.com/openai/openai-python/commit/6160de3e099f09c2d6ee5eeee4cbcc55b67a8f87))
+
+## 1.68.2 (2025-03-21)
+
+Full Changelog: [v1.68.1...v1.68.2](https://github.com/openai/openai-python/compare/v1.68.1...v1.68.2)
+
+### Refactors
+
+* **package:** rename audio extra to voice_helpers ([2dd6cb8](https://github.com/openai/openai-python/commit/2dd6cb87489fe12c5e45128f44d985c3f49aba1d))
+
+## 1.68.1 (2025-03-21)
+
+Full Changelog: [v1.68.0...v1.68.1](https://github.com/openai/openai-python/compare/v1.68.0...v1.68.1)
+
+### Bug Fixes
+
+* **client:** remove duplicate types ([#2235](https://github.com/openai/openai-python/issues/2235)) ([063f7d0](https://github.com/openai/openai-python/commit/063f7d0684c350ca9d766e2cb150233a22a623c8))
+* **helpers/audio:** remove duplicative module ([f253d04](https://github.com/openai/openai-python/commit/f253d0415145f2c4904ea2e7b389d31d94e45a54))
+* **package:** make sounddevice and numpy optional dependencies ([8b04453](https://github.com/openai/openai-python/commit/8b04453f0483736c13f0209a9f8f3618bc0e86c9))
+
+
+### Chores
+
+* **ci:** run workflows on next too ([67f89d4](https://github.com/openai/openai-python/commit/67f89d478aab780d1481c9bf6682c6633e431137))
+
+## 1.68.0 (2025-03-20)
+
+Full Changelog: [v1.67.0...v1.68.0](https://github.com/openai/openai-python/compare/v1.67.0...v1.68.0)
+
+### Features
+
+* add audio helpers ([423655c](https://github.com/openai/openai-python/commit/423655ca9077cfd258f1e52f6eb386fc8307fa5f))
+* **api:** new models for TTS, STT, + new audio features for Realtime ([#2232](https://github.com/openai/openai-python/issues/2232)) ([ab5192d](https://github.com/openai/openai-python/commit/ab5192d0a7b417ade622ec94dd48f86beb90692c))
+
+## 1.67.0 (2025-03-19)
+
+Full Changelog: [v1.66.5...v1.67.0](https://github.com/openai/openai-python/compare/v1.66.5...v1.67.0)
+
+### Features
+
+* **api:** o1-pro now available through the API ([#2228](https://github.com/openai/openai-python/issues/2228)) ([40a19d8](https://github.com/openai/openai-python/commit/40a19d8592c1767d6318230fc93e37c360d1bcd1))
+
+## 1.66.5 (2025-03-18)
+
+Full Changelog: [v1.66.4...v1.66.5](https://github.com/openai/openai-python/compare/v1.66.4...v1.66.5)
+
+### Bug Fixes
+
+* **types:** improve responses type names ([#2224](https://github.com/openai/openai-python/issues/2224)) ([5f7beb8](https://github.com/openai/openai-python/commit/5f7beb873af5ccef2551f34ab3ef098e099ce9c6))
+
+
+### Chores
+
+* **internal:** add back releases workflow ([c71d4c9](https://github.com/openai/openai-python/commit/c71d4c918eab3532b36ea944b0c4069db6ac2d38))
+* **internal:** codegen related update ([#2222](https://github.com/openai/openai-python/issues/2222)) ([f570d91](https://github.com/openai/openai-python/commit/f570d914a16cb5092533e32dfd863027d378c0b5))
+
+## 1.66.4 (2025-03-17)
+
+Full Changelog: [v1.66.3...v1.66.4](https://github.com/openai/openai-python/compare/v1.66.3...v1.66.4)
+
+### Bug Fixes
+
+* **ci:** ensure pip is always available ([#2207](https://github.com/openai/openai-python/issues/2207)) ([3f08e56](https://github.com/openai/openai-python/commit/3f08e56a48a04c2b7f03a4ad63f38228e25810e6))
+* **ci:** remove publishing patch ([#2208](https://github.com/openai/openai-python/issues/2208)) ([dd2dab7](https://github.com/openai/openai-python/commit/dd2dab7faf2a003da3e6af66780bd250be6e7f3f))
+* **types:** handle more discriminated union shapes ([#2206](https://github.com/openai/openai-python/issues/2206)) ([f85a9c6](https://github.com/openai/openai-python/commit/f85a9c633dcb9b64c0eb47d20151894742bbef22))
+
+
+### Chores
+
+* **internal:** bump rye to 0.44.0 ([#2200](https://github.com/openai/openai-python/issues/2200)) ([2dd3139](https://github.com/openai/openai-python/commit/2dd3139df6e7fe6307f9847e6527073e355e5047))
+* **internal:** remove CI condition ([#2203](https://github.com/openai/openai-python/issues/2203)) ([9620fdc](https://github.com/openai/openai-python/commit/9620fdcf4f2d01b6753ecc0abc16e5239c2b41e1))
+* **internal:** remove extra empty newlines ([#2195](https://github.com/openai/openai-python/issues/2195)) ([a1016a7](https://github.com/openai/openai-python/commit/a1016a78fe551e0f0e2562a0e81d1cb724d195da))
+* **internal:** update release workflows ([e2def44](https://github.com/openai/openai-python/commit/e2def4453323aa1cf8077df447fd55eb4c626393))
+
+## 1.66.3 (2025-03-12)
+
+Full Changelog: [v1.66.2...v1.66.3](https://github.com/openai/openai-python/compare/v1.66.2...v1.66.3)
+
+### Bug Fixes
+
+* update module level client ([#2185](https://github.com/openai/openai-python/issues/2185)) ([456f324](https://github.com/openai/openai-python/commit/456f3240a0c33e71521c6b73c32e8adc1b8cd3bc))
+
+## 1.66.2 (2025-03-11)
+
+Full Changelog: [v1.66.1...v1.66.2](https://github.com/openai/openai-python/compare/v1.66.1...v1.66.2)
+
+### Bug Fixes
+
+* **responses:** correct reasoning output type ([#2181](https://github.com/openai/openai-python/issues/2181)) ([8cb1129](https://github.com/openai/openai-python/commit/8cb11299acc40c80061af275691cd09a2bf30c65))
+
+## 1.66.1 (2025-03-11)
+
+Full Changelog: [v1.66.0...v1.66.1](https://github.com/openai/openai-python/compare/v1.66.0...v1.66.1)
+
+### Bug Fixes
+
+* **responses:** correct computer use enum value ([#2180](https://github.com/openai/openai-python/issues/2180)) ([48f4628](https://github.com/openai/openai-python/commit/48f4628c5fb18ddd7d71e8730184f3ac50c4ffea))
+
+
+### Chores
+
+* **internal:** temporary commit ([afabec1](https://github.com/openai/openai-python/commit/afabec1b5b18b41ac870970d06e6c2f152ef7bbe))
+
+## 1.66.0 (2025-03-11)
+
+Full Changelog: [v1.65.5...v1.66.0](https://github.com/openai/openai-python/compare/v1.65.5...v1.66.0)
+
+### Features
+
+* **api:** add /v1/responses and built-in tools ([854df97](https://github.com/openai/openai-python/commit/854df97884736244d46060fd3d5a92916826ec8f))
+
+
+### Chores
+
+* export more types ([#2176](https://github.com/openai/openai-python/issues/2176)) ([a730f0e](https://github.com/openai/openai-python/commit/a730f0efedd228f96a49467f17fb19b6a219246c))
+
+## 1.65.5 (2025-03-09)
+
+Full Changelog: [v1.65.4...v1.65.5](https://github.com/openai/openai-python/compare/v1.65.4...v1.65.5)
+
+### Chores
+
+* move ChatModel type to shared ([#2167](https://github.com/openai/openai-python/issues/2167)) ([104f02a](https://github.com/openai/openai-python/commit/104f02af371076d5d2498e48ae14d2eacc7df8bd))
+
+## 1.65.4 (2025-03-05)
+
+Full Changelog: [v1.65.3...v1.65.4](https://github.com/openai/openai-python/compare/v1.65.3...v1.65.4)
+
+### Bug Fixes
+
+* **api:** add missing file rank enum + more metadata ([#2164](https://github.com/openai/openai-python/issues/2164)) ([0387e48](https://github.com/openai/openai-python/commit/0387e48e0880e496eb74b60eec9ed76a3171f14d))
+
+## 1.65.3 (2025-03-04)
+
+Full Changelog: [v1.65.2...v1.65.3](https://github.com/openai/openai-python/compare/v1.65.2...v1.65.3)
+
+### Chores
+
+* **internal:** remove unused http client options forwarding ([#2158](https://github.com/openai/openai-python/issues/2158)) ([76ec464](https://github.com/openai/openai-python/commit/76ec464cfe3db3fa59a766259d6d6ee5bb889f86))
+* **internal:** run example files in CI ([#2160](https://github.com/openai/openai-python/issues/2160)) ([9979345](https://github.com/openai/openai-python/commit/9979345038594440eec2f500c0c7cc5417cc7c08))
+
+## 1.65.2 (2025-03-01)
+
+Full Changelog: [v1.65.1...v1.65.2](https://github.com/openai/openai-python/compare/v1.65.1...v1.65.2)
+
+### Bug Fixes
+
+* **azure:** azure_deployment use with realtime + non-deployment-based APIs ([#2154](https://github.com/openai/openai-python/issues/2154)) ([5846b55](https://github.com/openai/openai-python/commit/5846b552877f3d278689c521f9a26ce31167e1ea))
+
+
+### Chores
+
+* **docs:** update client docstring ([#2152](https://github.com/openai/openai-python/issues/2152)) ([0518c34](https://github.com/openai/openai-python/commit/0518c341ee0e19941c6b1d9d60e2552e1aa17f26))
+
+## 1.65.1 (2025-02-27)
+
+Full Changelog: [v1.65.0...v1.65.1](https://github.com/openai/openai-python/compare/v1.65.0...v1.65.1)
+
+### Documentation
+
+* update URLs from stainlessapi.com to stainless.com ([#2150](https://github.com/openai/openai-python/issues/2150)) ([dee4298](https://github.com/openai/openai-python/commit/dee42986eff46dd23ba25b3e2a5bb7357aca39d9))
+
+## 1.65.0 (2025-02-27)
+
+Full Changelog: [v1.64.0...v1.65.0](https://github.com/openai/openai-python/compare/v1.64.0...v1.65.0)
+
+### Features
+
+* **api:** add gpt-4.5-preview ([#2149](https://github.com/openai/openai-python/issues/2149)) ([4cee52e](https://github.com/openai/openai-python/commit/4cee52e8d191b0532f28d86446da79b43a58b907))
+
+
+### Chores
+
+* **internal:** properly set __pydantic_private__ ([#2144](https://github.com/openai/openai-python/issues/2144)) ([2b1bd16](https://github.com/openai/openai-python/commit/2b1bd1604a038ded67367742a0b1c9d92e29dfc8))
+
+## 1.64.0 (2025-02-22)
+
+Full Changelog: [v1.63.2...v1.64.0](https://github.com/openai/openai-python/compare/v1.63.2...v1.64.0)
+
+### Features
+
+* **client:** allow passing `NotGiven` for body ([#2135](https://github.com/openai/openai-python/issues/2135)) ([4451f56](https://github.com/openai/openai-python/commit/4451f5677f9eaad9b8fee74f71c2e5fe6785c420))
+
+
+### Bug Fixes
+
+* **client:** mark some request bodies as optional ([4451f56](https://github.com/openai/openai-python/commit/4451f5677f9eaad9b8fee74f71c2e5fe6785c420))
+
+
+### Chores
+
+* **internal:** fix devcontainers setup ([#2137](https://github.com/openai/openai-python/issues/2137)) ([4d88402](https://github.com/openai/openai-python/commit/4d884020cbeb1ca6093dd5317e3e5812551f7a46))
+
+## 1.63.2 (2025-02-17)
+
+Full Changelog: [v1.63.1...v1.63.2](https://github.com/openai/openai-python/compare/v1.63.1...v1.63.2)
+
+### Chores
+
+* **internal:** revert temporary commit ([#2121](https://github.com/openai/openai-python/issues/2121)) ([72458ab](https://github.com/openai/openai-python/commit/72458abeed3dd95db8aabed94a33bb12a916f8b7))
+
+## 1.63.1 (2025-02-17)
+
+Full Changelog: [v1.63.0...v1.63.1](https://github.com/openai/openai-python/compare/v1.63.0...v1.63.1)
+
+### Chores
+
+* **internal:** temporary commit ([#2121](https://github.com/openai/openai-python/issues/2121)) ([f7f8361](https://github.com/openai/openai-python/commit/f7f83614c8da84c6725d60936f08f9f1a65f0a9e))
+
+## 1.63.0 (2025-02-13)
+
+Full Changelog: [v1.62.0...v1.63.0](https://github.com/openai/openai-python/compare/v1.62.0...v1.63.0)
+
+### Features
+
+* **api:** add support for storing chat completions ([#2117](https://github.com/openai/openai-python/issues/2117)) ([2357a8f](https://github.com/openai/openai-python/commit/2357a8f97246a3fe17c6ac1fb0d7a67d6f1ffc1d))
+
+## 1.62.0 (2025-02-12)
+
+Full Changelog: [v1.61.1...v1.62.0](https://github.com/openai/openai-python/compare/v1.61.1...v1.62.0)
+
+### Features
+
+* **client:** send `X-Stainless-Read-Timeout` header ([#2094](https://github.com/openai/openai-python/issues/2094)) ([0288213](https://github.com/openai/openai-python/commit/0288213fbfa935c9bf9d56416619ea929ae1cf63))
+* **embeddings:** use stdlib array type for improved performance ([#2060](https://github.com/openai/openai-python/issues/2060)) ([9a95db9](https://github.com/openai/openai-python/commit/9a95db9154ac98678970e7f1652a7cacfd2f7fdb))
+* **pagination:** avoid fetching when has_more: false ([#2098](https://github.com/openai/openai-python/issues/2098)) ([1882483](https://github.com/openai/openai-python/commit/18824832d3a676ae49206cd2b5e09d4796fdf033))
+
+
+### Bug Fixes
+
+* **api:** add missing reasoning effort + model enums ([#2096](https://github.com/openai/openai-python/issues/2096)) ([e0ca9f0](https://github.com/openai/openai-python/commit/e0ca9f0f6fae40230f8cab97573914ed632920b6))
+* **parsing:** don't default to an empty array ([#2106](https://github.com/openai/openai-python/issues/2106)) ([8e748bb](https://github.com/openai/openai-python/commit/8e748bb08d9c0d1f7e8a1af31452e25eb7154f55))
+
+
+### Chores
+
+* **internal:** fix type traversing dictionary params ([#2097](https://github.com/openai/openai-python/issues/2097)) ([4e5b368](https://github.com/openai/openai-python/commit/4e5b368bf576f38d0f125778edde74ed6d101d7d))
+* **internal:** minor type handling changes ([#2099](https://github.com/openai/openai-python/issues/2099)) ([a2c6da0](https://github.com/openai/openai-python/commit/a2c6da0fbc610ee80a2e044a0b20fc1cc2376962))
+
+## 1.61.1 (2025-02-05)
+
+Full Changelog: [v1.61.0...v1.61.1](https://github.com/openai/openai-python/compare/v1.61.0...v1.61.1)
+
+### Bug Fixes
+
+* **api/types:** correct audio duration & role types ([#2091](https://github.com/openai/openai-python/issues/2091)) ([afcea48](https://github.com/openai/openai-python/commit/afcea4891ff85de165ccc2b5497ccf9a90520e9e))
+* **cli/chat:** only send params when set ([#2077](https://github.com/openai/openai-python/issues/2077)) ([688b223](https://github.com/openai/openai-python/commit/688b223d9a733d241d50e5d7df62f346592c537c))
+
+
+### Chores
+
+* **internal:** bummp ruff dependency ([#2080](https://github.com/openai/openai-python/issues/2080)) ([b7a80b1](https://github.com/openai/openai-python/commit/b7a80b1994ab86e81485b88531e4aea63b3da594))
+* **internal:** change default timeout to an int ([#2079](https://github.com/openai/openai-python/issues/2079)) ([d3df1c6](https://github.com/openai/openai-python/commit/d3df1c6ca090598701e38fd376a9796aadba88f1))
+
+## 1.61.0 (2025-01-31)
+
+Full Changelog: [v1.60.2...v1.61.0](https://github.com/openai/openai-python/compare/v1.60.2...v1.61.0)
+
+### Features
+
+* **api:** add o3-mini ([#2067](https://github.com/openai/openai-python/issues/2067)) ([12b87a4](https://github.com/openai/openai-python/commit/12b87a4a1e6cb071a6b063d089585dec56a5d534))
+
+
+### Bug Fixes
+
+* **types:** correct metadata type + other fixes ([12b87a4](https://github.com/openai/openai-python/commit/12b87a4a1e6cb071a6b063d089585dec56a5d534))
+
+
+### Chores
+
+* **helpers:** section links ([ef8d3cc](https://github.com/openai/openai-python/commit/ef8d3cce40022d3482d341455be604e5f1afbd70))
+* **types:** fix Metadata types ([82d3156](https://github.com/openai/openai-python/commit/82d3156e74ed2f95edd10cd7ebea53d2b5562794))
+* update api.md ([#2063](https://github.com/openai/openai-python/issues/2063)) ([21964f0](https://github.com/openai/openai-python/commit/21964f00fb104011c4c357544114702052b74548))
+
+
+### Documentation
+
+* **readme:** current section links ([#2055](https://github.com/openai/openai-python/issues/2055)) ([ef8d3cc](https://github.com/openai/openai-python/commit/ef8d3cce40022d3482d341455be604e5f1afbd70))
+
+## 1.60.2 (2025-01-27)
+
+Full Changelog: [v1.60.1...v1.60.2](https://github.com/openai/openai-python/compare/v1.60.1...v1.60.2)
+
+### Bug Fixes
+
+* **parsing:** don't validate input tools in the asynchronous `.parse()` method ([6fcfe73](https://github.com/openai/openai-python/commit/6fcfe73cd335853c7dd2cd3151a0d5d1785cfc9c))
+
+## 1.60.1 (2025-01-24)
+
+Full Changelog: [v1.60.0...v1.60.1](https://github.com/openai/openai-python/compare/v1.60.0...v1.60.1)
+
+### Chores
+
+* **internal:** minor formatting changes ([#2050](https://github.com/openai/openai-python/issues/2050)) ([9c44192](https://github.com/openai/openai-python/commit/9c44192be5776d9252d36dc027a33c60b33d81b2))
+
+
+### Documentation
+
+* **examples/azure:** add async snippet ([#1787](https://github.com/openai/openai-python/issues/1787)) ([f60eda1](https://github.com/openai/openai-python/commit/f60eda1c1e8caf0ec2274b18b3fb2252304196db))
+
+## 1.60.0 (2025-01-22)
+
+Full Changelog: [v1.59.9...v1.60.0](https://github.com/openai/openai-python/compare/v1.59.9...v1.60.0)
+
+### Features
+
+* **api:** update enum values, comments, and examples ([#2045](https://github.com/openai/openai-python/issues/2045)) ([e8205fd](https://github.com/openai/openai-python/commit/e8205fd58f0d677f476c577a8d9afb90f5710506))
+
+
+### Chores
+
+* **internal:** minor style changes ([#2043](https://github.com/openai/openai-python/issues/2043)) ([89a9dd8](https://github.com/openai/openai-python/commit/89a9dd821eaf5300ad11b0270b61fdfa4fd6e9b6))
+
+
+### Documentation
+
+* **readme:** mention failed requests in request IDs ([5f7c30b](https://github.com/openai/openai-python/commit/5f7c30bc006ffb666c324011a68aae357cb33e35))
+
+## 1.59.9 (2025-01-20)
+
+Full Changelog: [v1.59.8...v1.59.9](https://github.com/openai/openai-python/compare/v1.59.8...v1.59.9)
+
+### Bug Fixes
+
+* **tests:** make test_get_platform less flaky ([#2040](https://github.com/openai/openai-python/issues/2040)) ([72ea05c](https://github.com/openai/openai-python/commit/72ea05cf18caaa7a5e6fe7e2251ab93fa0ba3140))
+
+
+### Chores
+
+* **internal:** avoid pytest-asyncio deprecation warning ([#2041](https://github.com/openai/openai-python/issues/2041)) ([b901046](https://github.com/openai/openai-python/commit/b901046ddda9c79b7f019e2263c02d126a3b2ee2))
+* **internal:** update websockets dep ([#2036](https://github.com/openai/openai-python/issues/2036)) ([642cd11](https://github.com/openai/openai-python/commit/642cd119482c6fbca925ba702ad2579f9dc47bf9))
+
+
+### Documentation
+
+* fix typo ([#2031](https://github.com/openai/openai-python/issues/2031)) ([02fcf15](https://github.com/openai/openai-python/commit/02fcf15611953089826a74725cb96201d94658bb))
+* **raw responses:** fix duplicate `the` ([#2039](https://github.com/openai/openai-python/issues/2039)) ([9b8eab9](https://github.com/openai/openai-python/commit/9b8eab99fdc6a581a1f5cc421c6f74b0e2b30415))
+
+## 1.59.8 (2025-01-17)
+
+Full Changelog: [v1.59.7...v1.59.8](https://github.com/openai/openai-python/compare/v1.59.7...v1.59.8)
+
+### Bug Fixes
+
+* streaming ([c16f58e](https://github.com/openai/openai-python/commit/c16f58ead0bc85055b164182689ba74b7e939dfa))
+* **structured outputs:** avoid parsing empty empty content ([#2023](https://github.com/openai/openai-python/issues/2023)) ([6d3513c](https://github.com/openai/openai-python/commit/6d3513c86f6e5800f8f73a45e089b7a205327121))
+* **structured outputs:** correct schema coercion for inline ref expansion ([#2025](https://github.com/openai/openai-python/issues/2025)) ([2f4f0b3](https://github.com/openai/openai-python/commit/2f4f0b374207f162060c328b71ec995049dc42e8))
+* **types:** correct type for vector store chunking strategy ([#2017](https://github.com/openai/openai-python/issues/2017)) ([e389279](https://github.com/openai/openai-python/commit/e38927950a5cdad99065853fe7b72aad6bb322e9))
+
+
+### Chores
+
+* **examples:** update realtime model ([f26746c](https://github.com/openai/openai-python/commit/f26746cbcd893d66cf8a3fd68a7c3779dc8c833c)), closes [#2020](https://github.com/openai/openai-python/issues/2020)
+* **internal:** bump pyright dependency ([#2021](https://github.com/openai/openai-python/issues/2021)) ([0a9a0f5](https://github.com/openai/openai-python/commit/0a9a0f5d8b9d5457643798287f893305006dd518))
+* **internal:** streaming refactors ([#2012](https://github.com/openai/openai-python/issues/2012)) ([d76a748](https://github.com/openai/openai-python/commit/d76a748f606743407f94dfc26758095560e2082a))
+* **internal:** update deps ([#2015](https://github.com/openai/openai-python/issues/2015)) ([514e0e4](https://github.com/openai/openai-python/commit/514e0e415f87ab4510262d29ed6125384e017b84))
+
+
+### Documentation
+
+* **examples/azure:** example script with realtime API ([#1967](https://github.com/openai/openai-python/issues/1967)) ([84f2f9c](https://github.com/openai/openai-python/commit/84f2f9c0439229a7db7136fe78419292d34d1f81))
+
+## 1.59.7 (2025-01-13)
+
+Full Changelog: [v1.59.6...v1.59.7](https://github.com/openai/openai-python/compare/v1.59.6...v1.59.7)
+
+### Chores
+
+* export HttpxBinaryResponseContent class ([7191b71](https://github.com/openai/openai-python/commit/7191b71f3dcbbfcb2f2bec855c3bba93c956384e))
+
+## 1.59.6 (2025-01-09)
+
+Full Changelog: [v1.59.5...v1.59.6](https://github.com/openai/openai-python/compare/v1.59.5...v1.59.6)
+
+### Bug Fixes
+
+* correctly handle deserialising `cls` fields ([#2002](https://github.com/openai/openai-python/issues/2002)) ([089c820](https://github.com/openai/openai-python/commit/089c820c8a5d20e9db6a171f0a4f11b481fe8465))
+
+
+### Chores
+
+* **internal:** spec update ([#2000](https://github.com/openai/openai-python/issues/2000)) ([36548f8](https://github.com/openai/openai-python/commit/36548f871763fdd7b5ce44903d186bc916331549))
+
+## 1.59.5 (2025-01-08)
+
+Full Changelog: [v1.59.4...v1.59.5](https://github.com/openai/openai-python/compare/v1.59.4...v1.59.5)
+
+### Bug Fixes
+
+* **client:** only call .close() when needed ([#1992](https://github.com/openai/openai-python/issues/1992)) ([bdfd699](https://github.com/openai/openai-python/commit/bdfd699b99522e83f7610b5f98e36fe43ddf8338))
+
+
+### Documentation
+
+* fix typos ([#1995](https://github.com/openai/openai-python/issues/1995)) ([be694a0](https://github.com/openai/openai-python/commit/be694a097d6cf2668f08ecf94c882773b2ee1f84))
+* fix typos ([#1996](https://github.com/openai/openai-python/issues/1996)) ([714aed9](https://github.com/openai/openai-python/commit/714aed9d7eb74a19f6e502fb6d4fe83399f82851))
+* more typo fixes ([#1998](https://github.com/openai/openai-python/issues/1998)) ([7bd92f0](https://github.com/openai/openai-python/commit/7bd92f06a75f11f6afc2d1223d2426e186cc74cb))
+* **readme:** moved period to inside parentheses ([#1980](https://github.com/openai/openai-python/issues/1980)) ([e7fae94](https://github.com/openai/openai-python/commit/e7fae948f2ba8db23461e4374308417570196847))
+
+## 1.59.4 (2025-01-07)
+
+Full Changelog: [v1.59.3...v1.59.4](https://github.com/openai/openai-python/compare/v1.59.3...v1.59.4)
+
+### Chores
+
+* add missing isclass check ([#1988](https://github.com/openai/openai-python/issues/1988)) ([61d9072](https://github.com/openai/openai-python/commit/61d9072fbace58d64910ec7378c3686ac555972e))
+* add missing isclass check for structured outputs ([bcbf013](https://github.com/openai/openai-python/commit/bcbf013e8d825b8b5f88172313dfb6e0313ca34c))
+* **internal:** bump httpx dependency ([#1990](https://github.com/openai/openai-python/issues/1990)) ([288c2c3](https://github.com/openai/openai-python/commit/288c2c30dc405cbaa89924f9243442300e95e100))
+
+
+### Documentation
+
+* **realtime:** fix event reference link ([9b6885d](https://github.com/openai/openai-python/commit/9b6885d50f8d65ba5009642046727d291e0f14fa))
+
+## 1.59.3 (2025-01-03)
+
+Full Changelog: [v1.59.2...v1.59.3](https://github.com/openai/openai-python/compare/v1.59.2...v1.59.3)
+
+### Chores
+
+* **api:** bump spec version ([#1985](https://github.com/openai/openai-python/issues/1985)) ([c6f1b35](https://github.com/openai/openai-python/commit/c6f1b357fcf669065f4ed6819d47a528b0787128))
+
+## 1.59.2 (2025-01-03)
+
+Full Changelog: [v1.59.1...v1.59.2](https://github.com/openai/openai-python/compare/v1.59.1...v1.59.2)
+
+### Chores
+
+* **ci:** fix publish workflow ([0be1f5d](https://github.com/openai/openai-python/commit/0be1f5de0daf807cece564abf061c8bb188bb9aa))
+* **internal:** empty commit ([fe8dc2e](https://github.com/openai/openai-python/commit/fe8dc2e97fc430ea2433ed28cfaa79425af223ec))
+
+## 1.59.1 (2025-01-02)
+
+Full Changelog: [v1.59.0...v1.59.1](https://github.com/openai/openai-python/compare/v1.59.0...v1.59.1)
+
+### Chores
+
+* bump license year ([#1981](https://github.com/openai/openai-python/issues/1981)) ([f29011a](https://github.com/openai/openai-python/commit/f29011a6426d3fa4844ecd723ee20561ee60c665))
+
+## 1.59.0 (2024-12-21)
+
+Full Changelog: [v1.58.1...v1.59.0](https://github.com/openai/openai-python/compare/v1.58.1...v1.59.0)
+
+### Features
+
+* **azure:** support for the Realtime API ([#1963](https://github.com/openai/openai-python/issues/1963)) ([9fda141](https://github.com/openai/openai-python/commit/9fda14172abdb66fe240aa7b4dc7cfae4faf1d73))
+
+
+### Chores
+
+* **realtime:** update docstrings ([#1964](https://github.com/openai/openai-python/issues/1964)) ([3dee863](https://github.com/openai/openai-python/commit/3dee863554d28272103e90a6a199ac196e92ff05))
+
+## 1.58.1 (2024-12-17)
+
+Full Changelog: [v1.58.0...v1.58.1](https://github.com/openai/openai-python/compare/v1.58.0...v1.58.1)
+
+### Documentation
+
+* **readme:** fix example script link ([23ba877](https://github.com/openai/openai-python/commit/23ba8778fd55e0f54f36685e9c5950b452d8e10c))
+
+## 1.58.0 (2024-12-17)
+
+Full Changelog: [v1.57.4...v1.58.0](https://github.com/openai/openai-python/compare/v1.57.4...v1.58.0)
+
+### Features
+
+* add Realtime API support ([#1958](https://github.com/openai/openai-python/issues/1958)) ([97d73cf](https://github.com/openai/openai-python/commit/97d73cf89935ca6098bb889a92f0ec2cdff16989))
+* **api:** new o1 and GPT-4o models + preference fine-tuning ([#1956](https://github.com/openai/openai-python/issues/1956)) ([ec22ffb](https://github.com/openai/openai-python/commit/ec22ffb129c524525caa33b088405d27c271e631))
+
+
+### Bug Fixes
+
+* add reasoning_effort to all methods ([8829c32](https://github.com/openai/openai-python/commit/8829c3202dbe790ca3646476c802ec55ed47d864))
+* **assistants:** correctly send `include` query param ([9a4c69c](https://github.com/openai/openai-python/commit/9a4c69c383bc6719b6521a485f2c7e62a9c036a9))
+* **cli/migrate:** change grit binaries prefix ([#1951](https://github.com/openai/openai-python/issues/1951)) ([1c396c9](https://github.com/openai/openai-python/commit/1c396c95b040fb3d1a2523b09eaad4ff62d96846))
+
+
+### Chores
+
+* **internal:** fix some typos ([#1955](https://github.com/openai/openai-python/issues/1955)) ([628dead](https://github.com/openai/openai-python/commit/628dead660c00435bf46e09081c7b90b7bbe4a8a))
+
+
+### Documentation
+
+* add examples + guidance on Realtime API support ([1cb00f8](https://github.com/openai/openai-python/commit/1cb00f8fed78052aacbb9e0fac997b6ba0d44d2a))
+* **readme:** example snippet for client context manager ([#1953](https://github.com/openai/openai-python/issues/1953)) ([ad80255](https://github.com/openai/openai-python/commit/ad802551d8aaf4e6eff711118676ec4e64392638))
+
+## 1.57.4 (2024-12-13)
+
+Full Changelog: [v1.57.3...v1.57.4](https://github.com/openai/openai-python/compare/v1.57.3...v1.57.4)
+
+### Chores
+
+* **internal:** remove some duplicated imports ([#1946](https://github.com/openai/openai-python/issues/1946)) ([f94fddd](https://github.com/openai/openai-python/commit/f94fddd377015764b3c82919fdf956f619447b77))
+* **internal:** updated imports ([#1948](https://github.com/openai/openai-python/issues/1948)) ([13971fc](https://github.com/openai/openai-python/commit/13971fc450106746c0ae02ab931e68b770ee105e))
+
+## 1.57.3 (2024-12-12)
+
+Full Changelog: [v1.57.2...v1.57.3](https://github.com/openai/openai-python/compare/v1.57.2...v1.57.3)
+
+### Chores
+
+* **internal:** add support for TypeAliasType ([#1942](https://github.com/openai/openai-python/issues/1942)) ([d3442ff](https://github.com/openai/openai-python/commit/d3442ff28f2394200e14122f683d1f94686e8231))
+* **internal:** bump pyright ([#1939](https://github.com/openai/openai-python/issues/1939)) ([190d1a8](https://github.com/openai/openai-python/commit/190d1a805dee7c37fb8f9dcb93b1715caa06cf95))
+
+## 1.57.2 (2024-12-10)
+
+Full Changelog: [v1.57.1...v1.57.2](https://github.com/openai/openai-python/compare/v1.57.1...v1.57.2)
+
+### Bug Fixes
+
+* **azure:** handle trailing slash in `azure_endpoint` ([#1935](https://github.com/openai/openai-python/issues/1935)) ([69b73c5](https://github.com/openai/openai-python/commit/69b73c553b1982277c2f1b9d110ed951ddca689e))
+
+
+### Documentation
+
+* **readme:** fix http client proxies example ([#1932](https://github.com/openai/openai-python/issues/1932)) ([7a83e0f](https://github.com/openai/openai-python/commit/7a83e0fe4cc29e484ae417448b002c997745e4a3))
+
+## 1.57.1 (2024-12-09)
+
+Full Changelog: [v1.57.0...v1.57.1](https://github.com/openai/openai-python/compare/v1.57.0...v1.57.1)
+
+### Chores
+
+* **internal:** bump pydantic dependency ([#1929](https://github.com/openai/openai-python/issues/1929)) ([5227c95](https://github.com/openai/openai-python/commit/5227c95eff9c7b1395e6d8f14b94652a91ed2ee2))
+
+## 1.57.0 (2024-12-05)
+
+Full Changelog: [v1.56.2...v1.57.0](https://github.com/openai/openai-python/compare/v1.56.2...v1.57.0)
+
+### Features
+
+* **api:** updates ([#1924](https://github.com/openai/openai-python/issues/1924)) ([82ba614](https://github.com/openai/openai-python/commit/82ba6144682b0a6b3a22d4f764231c0c6afdcf6e))
+
+
+### Chores
+
+* bump openapi url ([#1922](https://github.com/openai/openai-python/issues/1922)) ([a472a8f](https://github.com/openai/openai-python/commit/a472a8fd0ba36b6897dcd02b6005fcf23f98f056))
+
+## 1.56.2 (2024-12-04)
+
+Full Changelog: [v1.56.1...v1.56.2](https://github.com/openai/openai-python/compare/v1.56.1...v1.56.2)
+
+### Chores
+
+* make the `Omit` type public ([#1919](https://github.com/openai/openai-python/issues/1919)) ([4fb8a1c](https://github.com/openai/openai-python/commit/4fb8a1cf1f8df37ce8c027bbaaac85a648bae02a))
+
+## 1.56.1 (2024-12-03)
+
+Full Changelog: [v1.56.0...v1.56.1](https://github.com/openai/openai-python/compare/v1.56.0...v1.56.1)
+
+### Bug Fixes
+
+* **cli:** remove usage of httpx proxies ([0e9fc3d](https://github.com/openai/openai-python/commit/0e9fc3dfbc7dec5b8c8f84dea9d87aad9f3d9cf6))
+
+
+### Chores
+
+* **internal:** bump pyright ([#1917](https://github.com/openai/openai-python/issues/1917)) ([0e87346](https://github.com/openai/openai-python/commit/0e8734637666ab22bc27fe4ec2cf7c39fddb5d08))
+
+## 1.56.0 (2024-12-02)
+
+Full Changelog: [v1.55.3...v1.56.0](https://github.com/openai/openai-python/compare/v1.55.3...v1.56.0)
+
+### Features
+
+* **client:** make ChatCompletionStreamState public ([#1898](https://github.com/openai/openai-python/issues/1898)) ([dc7f6cb](https://github.com/openai/openai-python/commit/dc7f6cb2618686ff04bfdca228913cda3d320884))
+
+## 1.55.3 (2024-11-28)
+
+Full Changelog: [v1.55.2...v1.55.3](https://github.com/openai/openai-python/compare/v1.55.2...v1.55.3)
+
+### Bug Fixes
+
+* **client:** compat with new httpx 0.28.0 release ([#1904](https://github.com/openai/openai-python/issues/1904)) ([72b6c63](https://github.com/openai/openai-python/commit/72b6c636c526885ef873580a07eff1c18e76bc10))
+
+## 1.55.2 (2024-11-27)
+
+Full Changelog: [v1.55.1...v1.55.2](https://github.com/openai/openai-python/compare/v1.55.1...v1.55.2)
+
+### Chores
+
+* **internal:** exclude mypy from running on tests ([#1899](https://github.com/openai/openai-python/issues/1899)) ([e2496f1](https://github.com/openai/openai-python/commit/e2496f1d274126bdaa46a8256b3dd384b4ae244b))
+
+
+### Documentation
+
+* **assistants:** correct on_text_delta example ([#1896](https://github.com/openai/openai-python/issues/1896)) ([460b663](https://github.com/openai/openai-python/commit/460b663567ed1031467a8d69eb13fd3b3da38827))
+
+## 1.55.1 (2024-11-25)
+
+Full Changelog: [v1.55.0...v1.55.1](https://github.com/openai/openai-python/compare/v1.55.0...v1.55.1)
+
+### Bug Fixes
+
+* **pydantic-v1:** avoid runtime error for assistants streaming ([#1885](https://github.com/openai/openai-python/issues/1885)) ([197c94b](https://github.com/openai/openai-python/commit/197c94b9e2620da8902aeed6959d2f871bb70461))
+
+
+### Chores
+
+* remove now unused `cached-property` dep ([#1867](https://github.com/openai/openai-python/issues/1867)) ([df5fac1](https://github.com/openai/openai-python/commit/df5fac1e557f79ed8d0935c48ca7f3f0bf77fa98))
+* remove now unused `cached-property` dep ([#1891](https://github.com/openai/openai-python/issues/1891)) ([feebaae](https://github.com/openai/openai-python/commit/feebaae85d76960cb8f1c58dd9b5180136c47962))
+
+
+### Documentation
+
+* add info log level to readme ([#1887](https://github.com/openai/openai-python/issues/1887)) ([358255d](https://github.com/openai/openai-python/commit/358255d15ed220f8c80a3c0861b98e61e909a7ae))
+
+## 1.55.0 (2024-11-20)
+
+Full Changelog: [v1.54.5...v1.55.0](https://github.com/openai/openai-python/compare/v1.54.5...v1.55.0)
+
+### Features
+
+* **api:** add gpt-4o-2024-11-20 model ([#1877](https://github.com/openai/openai-python/issues/1877)) ([ff64c2a](https://github.com/openai/openai-python/commit/ff64c2a0733854ed8cc1d7dd959a8287b2ec8120))
+
+## 1.54.5 (2024-11-19)
+
+Full Changelog: [v1.54.4...v1.54.5](https://github.com/openai/openai-python/compare/v1.54.4...v1.54.5)
+
+### Bug Fixes
+
+* **asyncify:** avoid hanging process under certain conditions ([#1853](https://github.com/openai/openai-python/issues/1853)) ([3d23437](https://github.com/openai/openai-python/commit/3d234377e7c9cd19db5186688612eb18e68cec8f))
+
+
+### Chores
+
+* **internal:** minor test changes ([#1874](https://github.com/openai/openai-python/issues/1874)) ([189339d](https://github.com/openai/openai-python/commit/189339d2a09d23ea1883286972f366e19b397f91))
+* **internal:** spec update ([#1873](https://github.com/openai/openai-python/issues/1873)) ([24c81f7](https://github.com/openai/openai-python/commit/24c81f729ae09ba3cec5542e5cc955c8b05b0f88))
+* **tests:** limit array example length ([#1870](https://github.com/openai/openai-python/issues/1870)) ([1e550df](https://github.com/openai/openai-python/commit/1e550df708fc3b5d903b7adfa2180058a216b676))
+
+## 1.54.4 (2024-11-12)
+
+Full Changelog: [v1.54.3...v1.54.4](https://github.com/openai/openai-python/compare/v1.54.3...v1.54.4)
+
+### Bug Fixes
+
+* don't use dicts as iterables in transform ([#1865](https://github.com/openai/openai-python/issues/1865)) ([76a51b1](https://github.com/openai/openai-python/commit/76a51b11efae50659a562197b1e18c6343964b56))
+
+
+### Documentation
+
+* bump models in example snippets to gpt-4o ([#1861](https://github.com/openai/openai-python/issues/1861)) ([adafe08](https://github.com/openai/openai-python/commit/adafe0859178d406fa93b38f3547f3d262651331))
+* move comments in example snippets ([#1860](https://github.com/openai/openai-python/issues/1860)) ([362cf74](https://github.com/openai/openai-python/commit/362cf74d6c34506f98f6c4fb2304357be21f7691))
+* **readme:** add missing asyncio import ([#1858](https://github.com/openai/openai-python/issues/1858)) ([dec9d0c](https://github.com/openai/openai-python/commit/dec9d0c97b702b6bcf9c71f5bdd6172bb5718354))
+
+## 1.54.3 (2024-11-06)
+
+Full Changelog: [v1.54.2...v1.54.3](https://github.com/openai/openai-python/compare/v1.54.2...v1.54.3)
+
+### Bug Fixes
+
+* **logs:** redact sensitive headers ([#1850](https://github.com/openai/openai-python/issues/1850)) ([466608f](https://github.com/openai/openai-python/commit/466608fa56b7a9939c08a4c78be2f6fe4a05111b))
+
+## 1.54.2 (2024-11-06)
+
+Full Changelog: [v1.54.1...v1.54.2](https://github.com/openai/openai-python/compare/v1.54.1...v1.54.2)
+
+### Chores
+
+* **tests:** adjust retry timeout values ([#1851](https://github.com/openai/openai-python/issues/1851)) ([cc8009c](https://github.com/openai/openai-python/commit/cc8009c9de56fe80f2689f69e7b891ff4ed297a3))
+
+## 1.54.1 (2024-11-05)
+
+Full Changelog: [v1.54.0...v1.54.1](https://github.com/openai/openai-python/compare/v1.54.0...v1.54.1)
+
+### Bug Fixes
+
+* add new prediction param to all methods ([6aa424d](https://github.com/openai/openai-python/commit/6aa424d076098312801febd938bd4b5e8baf4851))
+
+## 1.54.0 (2024-11-04)
+
+Full Changelog: [v1.53.1...v1.54.0](https://github.com/openai/openai-python/compare/v1.53.1...v1.54.0)
+
+### Features
+
+* **api:** add support for predicted outputs ([#1847](https://github.com/openai/openai-python/issues/1847)) ([42a4103](https://github.com/openai/openai-python/commit/42a410379a1b5f72424cc2e96dc6ddff22fd00be))
+* **project:** drop support for Python 3.7 ([#1845](https://github.com/openai/openai-python/issues/1845)) ([0ed5b1a](https://github.com/openai/openai-python/commit/0ed5b1a9302ccf2f40c3c751cd777740a4749cda))
+
+## 1.53.1 (2024-11-04)
+
+Full Changelog: [v1.53.0...v1.53.1](https://github.com/openai/openai-python/compare/v1.53.0...v1.53.1)
+
+### Bug Fixes
+
+* don't use dicts as iterables in transform ([#1842](https://github.com/openai/openai-python/issues/1842)) ([258f265](https://github.com/openai/openai-python/commit/258f26535744ab3b2f0746991fd29eae72ebd667))
+* support json safe serialization for basemodel subclasses ([#1844](https://github.com/openai/openai-python/issues/1844)) ([2b80c90](https://github.com/openai/openai-python/commit/2b80c90c21d3b2468dfa3bf40c08c5b0e0eebffa))
+
+
+### Chores
+
+* **internal:** bump mypy ([#1839](https://github.com/openai/openai-python/issues/1839)) ([d92f959](https://github.com/openai/openai-python/commit/d92f959aa6f49be56574b4d1d1ac5ac48689dd46))
+
+## 1.53.0 (2024-10-30)
+
+Full Changelog: [v1.52.2...v1.53.0](https://github.com/openai/openai-python/compare/v1.52.2...v1.53.0)
+
+### Features
+
+* **api:** add new, expressive voices for Realtime and Audio in Chat Completions ([7cf0a49](https://github.com/openai/openai-python/commit/7cf0a4958e4c985bef4d18bb919fa3948f389a82))
+
+
+### Chores
+
+* **internal:** bump pytest to v8 & pydantic ([#1829](https://github.com/openai/openai-python/issues/1829)) ([0e67a8a](https://github.com/openai/openai-python/commit/0e67a8af5daf9da029d2bd4bdf341cc8a494254a))
+
+## 1.52.2 (2024-10-23)
+
+Full Changelog: [v1.52.1...v1.52.2](https://github.com/openai/openai-python/compare/v1.52.1...v1.52.2)
+
+### Chores
+
+* **internal:** update spec version ([#1816](https://github.com/openai/openai-python/issues/1816)) ([c23282a](https://github.com/openai/openai-python/commit/c23282a328c48af90a88673ff5f6cc7a866f8758))
+
+## 1.52.1 (2024-10-22)
+
+Full Changelog: [v1.52.0...v1.52.1](https://github.com/openai/openai-python/compare/v1.52.0...v1.52.1)
+
+### Bug Fixes
+
+* **client/async:** correctly retry in all cases ([#1803](https://github.com/openai/openai-python/issues/1803)) ([9fe3f3f](https://github.com/openai/openai-python/commit/9fe3f3f925e06769b7ef6abbf1314a5e82749b4a))
+
+
+### Chores
+
+* **internal:** bump ruff dependency ([#1801](https://github.com/openai/openai-python/issues/1801)) ([859c672](https://github.com/openai/openai-python/commit/859c6725865f1b3285698f68693f9491d511f7ea))
+* **internal:** remove unused black config ([#1807](https://github.com/openai/openai-python/issues/1807)) ([112dab0](https://github.com/openai/openai-python/commit/112dab0290342654265db612c37d327d652251bb))
+* **internal:** update spec version ([#1810](https://github.com/openai/openai-python/issues/1810)) ([aa25b7b](https://github.com/openai/openai-python/commit/aa25b7b88823836b418a63da59491f5f3842773c))
+* **internal:** update test syntax ([#1798](https://github.com/openai/openai-python/issues/1798)) ([d3098dd](https://github.com/openai/openai-python/commit/d3098dd0b9fbe627c21a8ad39c119d125b7cdb54))
+* **tests:** add more retry tests ([#1806](https://github.com/openai/openai-python/issues/1806)) ([5525a1b](https://github.com/openai/openai-python/commit/5525a1ba536058ecc13411e1f98e88f7ec4bf8b9))
+
+## 1.52.0 (2024-10-17)
+
+Full Changelog: [v1.51.2...v1.52.0](https://github.com/openai/openai-python/compare/v1.51.2...v1.52.0)
+
+### Features
+
+* **api:** add gpt-4o-audio-preview model for chat completions ([#1796](https://github.com/openai/openai-python/issues/1796)) ([fbf1e0c](https://github.com/openai/openai-python/commit/fbf1e0c25c4d163f06b61a43d1a94ce001033a7b))
+
+## 1.51.2 (2024-10-08)
+
+Full Changelog: [v1.51.1...v1.51.2](https://github.com/openai/openai-python/compare/v1.51.1...v1.51.2)
+
+### Chores
+
+* add repr to PageInfo class ([#1780](https://github.com/openai/openai-python/issues/1780)) ([63118ee](https://github.com/openai/openai-python/commit/63118ee3c2481d217682e8a31337bdcc16893127))
+
+## 1.51.1 (2024-10-07)
+
+Full Changelog: [v1.51.0...v1.51.1](https://github.com/openai/openai-python/compare/v1.51.0...v1.51.1)
+
+### Bug Fixes
+
+* **client:** avoid OverflowError with very large retry counts ([#1779](https://github.com/openai/openai-python/issues/1779)) ([fb1dacf](https://github.com/openai/openai-python/commit/fb1dacfa4d9447d123c38ab3d3d433d900d32ec5))
+
+
+### Chores
+
+* **internal:** add support for parsing bool response content ([#1774](https://github.com/openai/openai-python/issues/1774)) ([aa2e25f](https://github.com/openai/openai-python/commit/aa2e25f9a4a632357051397ea34d269eafba026d))
+
+
+### Documentation
+
+* fix typo in fenced code block language ([#1769](https://github.com/openai/openai-python/issues/1769)) ([57bbc15](https://github.com/openai/openai-python/commit/57bbc155210cc439a36f4e5cbd082e94c3349d78))
+* improve and reference contributing documentation ([#1767](https://github.com/openai/openai-python/issues/1767)) ([a985a8b](https://github.com/openai/openai-python/commit/a985a8b8ab8d0b364bd3c26b6423a7c49ae7b1ce))
+
+## 1.51.0 (2024-10-01)
+
+Full Changelog: [v1.50.2...v1.51.0](https://github.com/openai/openai-python/compare/v1.50.2...v1.51.0)
+
+### Features
+
+* **api:** support storing chat completions, enabling evals and model distillation in the dashboard ([2840c6d](https://github.com/openai/openai-python/commit/2840c6df94afb44cfd80efabe0405898331ee267))
+
+
+### Chores
+
+* **docs:** fix maxium typo ([#1762](https://github.com/openai/openai-python/issues/1762)) ([de94553](https://github.com/openai/openai-python/commit/de94553f93d71fc6c8187c8d3fbe924a71cc46dd))
+* **internal:** remove ds store ([47a3968](https://github.com/openai/openai-python/commit/47a3968f9b318eb02d5602f5b10e7d9e69c3ae84))
+
+
+### Documentation
+
+* **helpers:** fix method name typo ([#1764](https://github.com/openai/openai-python/issues/1764)) ([e1bcfe8](https://github.com/openai/openai-python/commit/e1bcfe86554017ac63055060153c4fd72e65c0cf))
+
+## 1.50.2 (2024-09-27)
+
+Full Changelog: [v1.50.1...v1.50.2](https://github.com/openai/openai-python/compare/v1.50.1...v1.50.2)
+
+### Bug Fixes
+
+* **audio:** correct types for transcriptions / translations ([#1755](https://github.com/openai/openai-python/issues/1755)) ([76c1f3f](https://github.com/openai/openai-python/commit/76c1f3f318b68003aae124c02efc4547a398a864))
+
+## 1.50.1 (2024-09-27)
+
+Full Changelog: [v1.50.0...v1.50.1](https://github.com/openai/openai-python/compare/v1.50.0...v1.50.1)
+
+### Documentation
+
+* **helpers:** fix chat completion anchor ([#1753](https://github.com/openai/openai-python/issues/1753)) ([956d4e8](https://github.com/openai/openai-python/commit/956d4e8e32507fbce399f4619e06daa9d37a0532))
+
+## 1.50.0 (2024-09-26)
+
+Full Changelog: [v1.49.0...v1.50.0](https://github.com/openai/openai-python/compare/v1.49.0...v1.50.0)
+
+### Features
+
+* **structured outputs:** add support for accessing raw responses ([#1748](https://github.com/openai/openai-python/issues/1748)) ([0189e28](https://github.com/openai/openai-python/commit/0189e28b0b062a28b16343da0460a4f0f4e17a9a))
+
+
+### Chores
+
+* **pydantic v1:** exclude specific properties when rich printing ([#1751](https://github.com/openai/openai-python/issues/1751)) ([af535ce](https://github.com/openai/openai-python/commit/af535ce6a523eca39438f117a3e55f16064567a9))
+
+## 1.49.0 (2024-09-26)
+
+Full Changelog: [v1.48.0...v1.49.0](https://github.com/openai/openai-python/compare/v1.48.0...v1.49.0)
+
+### Features
+
+* **api:** add omni-moderation model ([#1750](https://github.com/openai/openai-python/issues/1750)) ([05b50da](https://github.com/openai/openai-python/commit/05b50da5428d5c7b5ea09626bcd88f8423762bf8))
+
+
+### Chores
+
+* **internal:** update test snapshots ([#1749](https://github.com/openai/openai-python/issues/1749)) ([42f054e](https://github.com/openai/openai-python/commit/42f054ee7afa8ce8316c2ecd90608a0f7e13bfdd))
+
+## 1.48.0 (2024-09-25)
+
+Full Changelog: [v1.47.1...v1.48.0](https://github.com/openai/openai-python/compare/v1.47.1...v1.48.0)
+
+### Features
+
+* **client:** allow overriding retry count header ([#1745](https://github.com/openai/openai-python/issues/1745)) ([9f07d4d](https://github.com/openai/openai-python/commit/9f07d4dbd6f24108a1f5e0309037318858f5a229))
+
+
+### Bug Fixes
+
+* **audio:** correct response_format translations type ([#1743](https://github.com/openai/openai-python/issues/1743)) ([b912108](https://github.com/openai/openai-python/commit/b9121089c696bc943323e2e75d4706401d809aaa))
+
+
+### Chores
+
+* **internal:** use `typing_extensions.overload` instead of `typing` ([#1740](https://github.com/openai/openai-python/issues/1740)) ([2522bd5](https://github.com/openai/openai-python/commit/2522bd59f7b5e903e4fc856a4c5dbdbe66bba37f))
+
+## 1.47.1 (2024-09-23)
+
+Full Changelog: [v1.47.0...v1.47.1](https://github.com/openai/openai-python/compare/v1.47.0...v1.47.1)
+
+### Bug Fixes
+
+* **pydantic v1:** avoid warnings error ([1e8e7d1](https://github.com/openai/openai-python/commit/1e8e7d1f01a4ab4153085bc20484a19613d993b3))
+
+## 1.47.0 (2024-09-20)
+
+Full Changelog: [v1.46.1...v1.47.0](https://github.com/openai/openai-python/compare/v1.46.1...v1.47.0)
+
+### Features
+
+* **client:** send retry count header ([21b0c00](https://github.com/openai/openai-python/commit/21b0c0043406d81971f87455e5a48b17935dc346))
+
+
+### Chores
+
+* **types:** improve type name for embedding models ([#1730](https://github.com/openai/openai-python/issues/1730)) ([4b4eb2b](https://github.com/openai/openai-python/commit/4b4eb2b37877728d2124ad5651ceebf615c0ab28))
+
+## 1.46.1 (2024-09-19)
+
+Full Changelog: [v1.46.0...v1.46.1](https://github.com/openai/openai-python/compare/v1.46.0...v1.46.1)
+
+### Bug Fixes
+
+* **client:** handle domains with underscores ([#1726](https://github.com/openai/openai-python/issues/1726)) ([cd194df](https://github.com/openai/openai-python/commit/cd194dfdc418a84589bd903357cba349e9ad3e78))
+
+
+### Chores
+
+* **streaming:** silence pydantic model_dump warnings ([#1722](https://github.com/openai/openai-python/issues/1722)) ([30f84b9](https://github.com/openai/openai-python/commit/30f84b96081ac37f60e40a75d765dbbf563b61b3))
+
+## 1.46.0 (2024-09-17)
+
+Full Changelog: [v1.45.1...v1.46.0](https://github.com/openai/openai-python/compare/v1.45.1...v1.46.0)
+
+### Features
+
+* **client:** add ._request_id property to object responses ([#1707](https://github.com/openai/openai-python/issues/1707)) ([8b3da05](https://github.com/openai/openai-python/commit/8b3da05a35b33245aec98693a0540ace6218a61b))
+
+
+### Documentation
+
+* **readme:** add examples for chat with image content ([#1703](https://github.com/openai/openai-python/issues/1703)) ([192b8f2](https://github.com/openai/openai-python/commit/192b8f2b6a49f462e48c1442858931875524ab49))
+
+## 1.45.1 (2024-09-16)
+
+Full Changelog: [v1.45.0...v1.45.1](https://github.com/openai/openai-python/compare/v1.45.0...v1.45.1)
+
+### Chores
+
+* **internal:** bump pyright / mypy version ([#1717](https://github.com/openai/openai-python/issues/1717)) ([351af85](https://github.com/openai/openai-python/commit/351af85c5b813391910301a5049edddc8c9e70dd))
+* **internal:** bump ruff ([#1714](https://github.com/openai/openai-python/issues/1714)) ([aceaf64](https://github.com/openai/openai-python/commit/aceaf641eedd092ed42e4aaf031e8cfbf37e4212))
+* **internal:** update spec link ([#1716](https://github.com/openai/openai-python/issues/1716)) ([ca58c7f](https://github.com/openai/openai-python/commit/ca58c7f83a7cede0367dec2500127573c9b00d1f))
+
+
+### Documentation
+
+* update CONTRIBUTING.md ([#1710](https://github.com/openai/openai-python/issues/1710)) ([4d45eb5](https://github.com/openai/openai-python/commit/4d45eb5eb794bcc5076c022be09e06fae103abcc))
+
+## 1.45.0 (2024-09-12)
+
+Full Changelog: [v1.44.1...v1.45.0](https://github.com/openai/openai-python/compare/v1.44.1...v1.45.0)
+
+### Features
+
+* **api:** add o1 models ([#1708](https://github.com/openai/openai-python/issues/1708)) ([06bd42e](https://github.com/openai/openai-python/commit/06bd42e77121a6abd4826a79ce1848812d956576))
+* **errors:** include completion in LengthFinishReasonError ([#1701](https://github.com/openai/openai-python/issues/1701)) ([b0e3256](https://github.com/openai/openai-python/commit/b0e32562aff9aceafec994d3b047f7c2a9f11524))
+
+
+### Bug Fixes
+
+* **types:** correctly mark stream discriminator as optional ([#1706](https://github.com/openai/openai-python/issues/1706)) ([80f02f9](https://github.com/openai/openai-python/commit/80f02f9e5f83fac9cd2f4172b733a92ad01399b2))
+
+## 1.44.1 (2024-09-09)
+
+Full Changelog: [v1.44.0...v1.44.1](https://github.com/openai/openai-python/compare/v1.44.0...v1.44.1)
+
+### Chores
+
+* add docstrings to raw response properties ([#1696](https://github.com/openai/openai-python/issues/1696)) ([1d2a19b](https://github.com/openai/openai-python/commit/1d2a19b0e8acab54c35ef2171d33321943488fdc))
+
+
+### Documentation
+
+* **readme:** add section on determining installed version ([#1697](https://github.com/openai/openai-python/issues/1697)) ([0255735](https://github.com/openai/openai-python/commit/0255735930d9c657c78e85e7f03fd1eb98a1e378))
+* **readme:** improve custom `base_url` example ([#1694](https://github.com/openai/openai-python/issues/1694)) ([05eec8a](https://github.com/openai/openai-python/commit/05eec8a0b7fcdc8651021f2e685214a353b861d1))
+
+## 1.44.0 (2024-09-06)
+
+Full Changelog: [v1.43.1...v1.44.0](https://github.com/openai/openai-python/compare/v1.43.1...v1.44.0)
+
+### Features
+
+* **vector store:** improve chunking strategy type names ([#1690](https://github.com/openai/openai-python/issues/1690)) ([e82cd85](https://github.com/openai/openai-python/commit/e82cd85ac4962e36cb3b139c503069b56918688f))
+
+## 1.43.1 (2024-09-05)
+
+Full Changelog: [v1.43.0...v1.43.1](https://github.com/openai/openai-python/compare/v1.43.0...v1.43.1)
+
+### Chores
+
+* pyproject.toml formatting changes ([#1687](https://github.com/openai/openai-python/issues/1687)) ([3387ede](https://github.com/openai/openai-python/commit/3387ede0b896788bf1197378b01941c75bd6e179))
+
+## 1.43.0 (2024-08-29)
+
+Full Changelog: [v1.42.0...v1.43.0](https://github.com/openai/openai-python/compare/v1.42.0...v1.43.0)
+
+### Features
+
+* **api:** add file search result details to run steps ([#1681](https://github.com/openai/openai-python/issues/1681)) ([f5449c0](https://github.com/openai/openai-python/commit/f5449c07580ac9707f0387f86f4772fbf0a874b6))
+
+## 1.42.0 (2024-08-20)
+
+Full Changelog: [v1.41.1...v1.42.0](https://github.com/openai/openai-python/compare/v1.41.1...v1.42.0)
+
+### Features
+
+* **parsing:** add support for pydantic dataclasses ([#1655](https://github.com/openai/openai-python/issues/1655)) ([101bee9](https://github.com/openai/openai-python/commit/101bee9844f725d2174796c3d09a58d3aa079fad))
+
+
+### Chores
+
+* **ci:** also run pydantic v1 tests ([#1666](https://github.com/openai/openai-python/issues/1666)) ([af2a1ca](https://github.com/openai/openai-python/commit/af2a1ca408a406098c6c79837aa3561b822e08ec))
+
+## 1.41.1 (2024-08-19)
+
+Full Changelog: [v1.41.0...v1.41.1](https://github.com/openai/openai-python/compare/v1.41.0...v1.41.1)
+
+### Bug Fixes
+
+* **json schema:** remove `None` defaults ([#1663](https://github.com/openai/openai-python/issues/1663)) ([30215c1](https://github.com/openai/openai-python/commit/30215c15df613cf9c36cafd717af79158c9db3e5))
+
+
+### Chores
+
+* **client:** fix parsing union responses when non-json is returned ([#1665](https://github.com/openai/openai-python/issues/1665)) ([822c37d](https://github.com/openai/openai-python/commit/822c37de49eb2ffe8c05122f7520ba87bd76e30b))
+
+## 1.41.0 (2024-08-16)
+
+Full Changelog: [v1.40.8...v1.41.0](https://github.com/openai/openai-python/compare/v1.40.8...v1.41.0)
+
+### Features
+
+* **client:** add uploads.upload_file helper ([aae079d](https://github.com/openai/openai-python/commit/aae079daa3c1763ab0e46bad766ae5261b475806))
+
+## 1.40.8 (2024-08-15)
+
+Full Changelog: [v1.40.7...v1.40.8](https://github.com/openai/openai-python/compare/v1.40.7...v1.40.8)
+
+### Chores
+
+* **types:** define FilePurpose enum ([#1653](https://github.com/openai/openai-python/issues/1653)) ([3c2eeae](https://github.com/openai/openai-python/commit/3c2eeae32adf5d4ab6bc622be6f9a95a1a298dd3))
+
+## 1.40.7 (2024-08-15)
+
+Full Changelog: [v1.40.6...v1.40.7](https://github.com/openai/openai-python/compare/v1.40.6...v1.40.7)
+
+### Bug Fixes
+
+* **cli/migrate:** change grit binaries download source ([#1649](https://github.com/openai/openai-python/issues/1649)) ([85e8935](https://github.com/openai/openai-python/commit/85e8935d9a123b92964d39a98334a975a06ab845))
+
+
+### Chores
+
+* **docs:** fix typo in example snippet ([4e83b57](https://github.com/openai/openai-python/commit/4e83b57ffbb64e1c98c19968557dc68a0b65d0b3))
+* **internal:** use different 32bit detection method ([#1652](https://github.com/openai/openai-python/issues/1652)) ([5831af6](https://github.com/openai/openai-python/commit/5831af65048af2a5df9e3ea4a48b8fff2e66dd8c))
+
+## 1.40.6 (2024-08-12)
+
+Full Changelog: [v1.40.5...v1.40.6](https://github.com/openai/openai-python/compare/v1.40.5...v1.40.6)
+
+### Chores
+
+* **examples:** minor formatting changes ([#1644](https://github.com/openai/openai-python/issues/1644)) ([e08acf1](https://github.com/openai/openai-python/commit/e08acf1c6edd1501ed70c4634cd884ab1658af0d))
+* **internal:** update some imports ([#1642](https://github.com/openai/openai-python/issues/1642)) ([fce1ea7](https://github.com/openai/openai-python/commit/fce1ea72a89ba2737bc77775fe04f3a21ecb28e7))
+* sync openapi url ([#1646](https://github.com/openai/openai-python/issues/1646)) ([8ae3801](https://github.com/openai/openai-python/commit/8ae380123ada0bfaca9961e222a0e9c8b585e2d4))
+* **tests:** fix pydantic v1 tests ([2623630](https://github.com/openai/openai-python/commit/26236303f0f6de5df887e8ee3e41d5bc39a3abb1))
+
+## 1.40.5 (2024-08-12)
+
+Full Changelog: [v1.40.4...v1.40.5](https://github.com/openai/openai-python/compare/v1.40.4...v1.40.5)
+
+### Documentation
+
+* **helpers:** make async client usage more clear ([34e1edf](https://github.com/openai/openai-python/commit/34e1edf29d6008df7196aaebc45172fa536c6410)), closes [#1639](https://github.com/openai/openai-python/issues/1639)
+
+## 1.40.4 (2024-08-12)
+
+Full Changelog: [v1.40.3...v1.40.4](https://github.com/openai/openai-python/compare/v1.40.3...v1.40.4)
+
+### Bug Fixes
+
+* **json schema:** unravel `$ref`s alongside additional keys ([c7a3d29](https://github.com/openai/openai-python/commit/c7a3d2986acaf3b31844b39608d03265ad87bb04))
+* **json schema:** unwrap `allOf`s with one entry ([53d964d](https://github.com/openai/openai-python/commit/53d964defebdf385d7d832ec7f13111b4af13c27))
+
+## 1.40.3 (2024-08-10)
+
+Full Changelog: [v1.40.2...v1.40.3](https://github.com/openai/openai-python/compare/v1.40.2...v1.40.3)
+
+### Chores
+
+* **ci:** bump prism mock server version ([#1630](https://github.com/openai/openai-python/issues/1630)) ([214d8fd](https://github.com/openai/openai-python/commit/214d8fd8d7d43c06c7dfe02680847a6a60988120))
+* **ci:** codeowners file ([#1627](https://github.com/openai/openai-python/issues/1627)) ([c059a20](https://github.com/openai/openai-python/commit/c059a20c8cd2124178641c9d8688e276b1cf1d59))
+* **internal:** ensure package is importable in lint cmd ([#1631](https://github.com/openai/openai-python/issues/1631)) ([779e6d0](https://github.com/openai/openai-python/commit/779e6d081eb55c158f2aa1962190079eb7f1335e))
+
+## 1.40.2 (2024-08-08)
+
+Full Changelog: [v1.40.1...v1.40.2](https://github.com/openai/openai-python/compare/v1.40.1...v1.40.2)
+
+### Bug Fixes
+
+* **client:** raise helpful error message for response_format misuse ([18191da](https://github.com/openai/openai-python/commit/18191dac8e1437a0f708525d474b7ecfe459d966))
+* **json schema:** support recursive BaseModels in Pydantic v1 ([#1623](https://github.com/openai/openai-python/issues/1623)) ([43e10c0](https://github.com/openai/openai-python/commit/43e10c0f251a42f1e6497f360c6c23d3058b3da3))
+
+
+### Chores
+
+* **internal:** format some docstrings ([d34a081](https://github.com/openai/openai-python/commit/d34a081c30f869646145919b2256ded115241eb5))
+* **internal:** updates ([#1624](https://github.com/openai/openai-python/issues/1624)) ([598e7a2](https://github.com/openai/openai-python/commit/598e7a23768e7addbe1319ada2e87caee3cf0d14))
+
+## 1.40.1 (2024-08-07)
+
+Full Changelog: [v1.40.0...v1.40.1](https://github.com/openai/openai-python/compare/v1.40.0...v1.40.1)
+
+### Chores
+
+* **internal:** update OpenAPI spec url ([#1608](https://github.com/openai/openai-python/issues/1608)) ([5392753](https://github.com/openai/openai-python/commit/53927531fc101e96b9e3f5d44f34b298055f496a))
+* **internal:** update test snapshots ([a11d1cb](https://github.com/openai/openai-python/commit/a11d1cb5d04aac0bf69dc10a3a21fa95575c0aa0))
+
+## 1.40.0 (2024-08-06)
+
+Full Changelog: [v1.39.0...v1.40.0](https://github.com/openai/openai-python/compare/v1.39.0...v1.40.0)
+
+### Features
+
+* **api:** add structured outputs support ([e8dba7d](https://github.com/openai/openai-python/commit/e8dba7d0e08a7d0de5952be716e0efe9ae373759))
+
+
+### Chores
+
+* **internal:** bump ruff version ([#1604](https://github.com/openai/openai-python/issues/1604)) ([3e19a87](https://github.com/openai/openai-python/commit/3e19a87255d8e92716689656afaa3f16297773b6))
+* **internal:** update pydantic compat helper function ([#1607](https://github.com/openai/openai-python/issues/1607)) ([973c18b](https://github.com/openai/openai-python/commit/973c18b259a0e4a8134223f50a5f660b86650949))
+
+## 1.39.0 (2024-08-05)
+
+Full Changelog: [v1.38.0...v1.39.0](https://github.com/openai/openai-python/compare/v1.38.0...v1.39.0)
+
+### Features
+
+* **client:** add `retries_taken` to raw response class ([#1601](https://github.com/openai/openai-python/issues/1601)) ([777822b](https://github.com/openai/openai-python/commit/777822b39b7f9ebd6272d0af8fc04f9d657bd886))
+
+
+### Bug Fixes
+
+* **assistants:** add parallel_tool_calls param to runs.stream ([113e82a](https://github.com/openai/openai-python/commit/113e82a82c7390660ad3324fa8f9842f83b27571))
+
+
+### Chores
+
+* **internal:** bump pyright ([#1599](https://github.com/openai/openai-python/issues/1599)) ([27f0f10](https://github.com/openai/openai-python/commit/27f0f107e39d16adc0d5a50ffe4c687e0e3c42e5))
+* **internal:** test updates ([#1602](https://github.com/openai/openai-python/issues/1602)) ([af22d80](https://github.com/openai/openai-python/commit/af22d8079cf44cde5f03a206e78b900f8413dc43))
+* **internal:** use `TypeAlias` marker for type assignments ([#1597](https://github.com/openai/openai-python/issues/1597)) ([5907ea0](https://github.com/openai/openai-python/commit/5907ea04d6f5e0ffd17c38ad6a644a720ece8abe))
+
+## 1.38.0 (2024-08-02)
+
+Full Changelog: [v1.37.2...v1.38.0](https://github.com/openai/openai-python/compare/v1.37.2...v1.38.0)
+
+### Features
+
+* extract out `ImageModel`, `AudioModel`, `SpeechModel` ([#1586](https://github.com/openai/openai-python/issues/1586)) ([b800316](https://github.com/openai/openai-python/commit/b800316aee6c8b2aeb609ca4c41972adccd2fa7a))
+* make enums not nominal ([#1588](https://github.com/openai/openai-python/issues/1588)) ([ab4519b](https://github.com/openai/openai-python/commit/ab4519bc45f5512c8c5165641c217385d999809c))
+
+## 1.37.2 (2024-08-01)
+
+Full Changelog: [v1.37.1...v1.37.2](https://github.com/openai/openai-python/compare/v1.37.1...v1.37.2)
+
+### Chores
+
+* **internal:** add type construction helper ([#1584](https://github.com/openai/openai-python/issues/1584)) ([cbb186a](https://github.com/openai/openai-python/commit/cbb186a534b520fa5b11a9b371b175e3f6a6482b))
+* **runs/create_and_poll:** add parallel_tool_calls request param ([04b3e6c](https://github.com/openai/openai-python/commit/04b3e6c39ee5a7088e0e4dfa4c06f3dcce901a57))
+
+## 1.37.1 (2024-07-25)
+
+Full Changelog: [v1.37.0...v1.37.1](https://github.com/openai/openai-python/compare/v1.37.0...v1.37.1)
+
+### Chores
+
+* **tests:** update prism version ([#1572](https://github.com/openai/openai-python/issues/1572)) ([af82593](https://github.com/openai/openai-python/commit/af8259393673af1ef6ec711da6297eb4ad55b66e))
+
+## 1.37.0 (2024-07-22)
+
+Full Changelog: [v1.36.1...v1.37.0](https://github.com/openai/openai-python/compare/v1.36.1...v1.37.0)
+
+### Features
+
+* **api:** add uploads endpoints ([#1568](https://github.com/openai/openai-python/issues/1568)) ([d877b6d](https://github.com/openai/openai-python/commit/d877b6dabb9b3e8da6ff2f46de1120af54de398d))
+
+
+### Bug Fixes
+
+* **cli/audio:** handle non-json response format ([#1557](https://github.com/openai/openai-python/issues/1557)) ([bb7431f](https://github.com/openai/openai-python/commit/bb7431f602602d4c74d75809c6934a7fd192972d))
+
+
+### Documentation
+
+* **readme:** fix example snippet imports ([#1569](https://github.com/openai/openai-python/issues/1569)) ([0c90af6](https://github.com/openai/openai-python/commit/0c90af6412b3314c2257b9b8eb7fabd767f32ef6))
+
+## 1.36.1 (2024-07-20)
+
+Full Changelog: [v1.36.0...v1.36.1](https://github.com/openai/openai-python/compare/v1.36.0...v1.36.1)
+
+### Bug Fixes
+
+* **types:** add gpt-4o-mini to more assistants methods ([39a8a37](https://github.com/openai/openai-python/commit/39a8a372eb3f2d75fd4310d42294d05175a59fd8))
+
+## 1.36.0 (2024-07-19)
+
+Full Changelog: [v1.35.15...v1.36.0](https://github.com/openai/openai-python/compare/v1.35.15...v1.36.0)
+
+### Features
+
+* **api:** add new gpt-4o-mini models ([#1561](https://github.com/openai/openai-python/issues/1561)) ([5672ad4](https://github.com/openai/openai-python/commit/5672ad40aaa3498f6143baa48fc22bb1a3475bea))
+
+## 1.35.15 (2024-07-18)
+
+Full Changelog: [v1.35.14...v1.35.15](https://github.com/openai/openai-python/compare/v1.35.14...v1.35.15)
+
+### Chores
+
+* **docs:** document how to do per-request http client customization ([#1560](https://github.com/openai/openai-python/issues/1560)) ([24c0768](https://github.com/openai/openai-python/commit/24c076873c5cb2abe0d3e285b99aa110451b0f19))
+* **internal:** update formatting ([#1553](https://github.com/openai/openai-python/issues/1553)) ([e1389bc](https://github.com/openai/openai-python/commit/e1389bcc26f3aac63fc6bc9bb151c9a330d95b4e))
+
+## 1.35.14 (2024-07-15)
+
+Full Changelog: [v1.35.13...v1.35.14](https://github.com/openai/openai-python/compare/v1.35.13...v1.35.14)
+
+### Chores
+
+* **docs:** minor update to formatting of API link in README ([#1550](https://github.com/openai/openai-python/issues/1550)) ([a6e59c6](https://github.com/openai/openai-python/commit/a6e59c6bbff9e1132aa323c0ecb3be7f0692ae42))
+* **internal:** minor formatting changes ([ee1c62e](https://github.com/openai/openai-python/commit/ee1c62ede01872e76156d886af4aab5f8eb1cc64))
+* **internal:** minor options / compat functions updates ([#1549](https://github.com/openai/openai-python/issues/1549)) ([a0701b5](https://github.com/openai/openai-python/commit/a0701b5dbeda4ac2d8a4b093aee4bdad9d674ee2))
+
+## 1.35.13 (2024-07-10)
+
+Full Changelog: [v1.35.12...v1.35.13](https://github.com/openai/openai-python/compare/v1.35.12...v1.35.13)
+
+### Bug Fixes
+
+* **threads/runs/create_and_run_stream:** correct tool_resources param ([8effd08](https://github.com/openai/openai-python/commit/8effd08be3ab1cf509bdbfd9f174f9186fdbf71f))
+
+
+### Chores
+
+* **internal:** add helper function ([#1538](https://github.com/openai/openai-python/issues/1538)) ([81655a0](https://github.com/openai/openai-python/commit/81655a012e28c0240e71cf74b77ad1f9ac630906))
+
+## 1.35.12 (2024-07-09)
+
+Full Changelog: [v1.35.11...v1.35.12](https://github.com/openai/openai-python/compare/v1.35.11...v1.35.12)
+
+### Bug Fixes
+
+* **azure:** refresh auth token during retries ([#1533](https://github.com/openai/openai-python/issues/1533)) ([287926e](https://github.com/openai/openai-python/commit/287926e4c0920b930af2b9d3d8b46a24e78e2979))
+* **tests:** fresh_env() now resets new environment values ([64da888](https://github.com/openai/openai-python/commit/64da888ca4d13f0b4b6d9e22ec93a897b2d6bb24))
+
+## 1.35.11 (2024-07-09)
+
+Full Changelog: [v1.35.10...v1.35.11](https://github.com/openai/openai-python/compare/v1.35.10...v1.35.11)
+
+### Chores
+
+* **internal:** minor request options handling changes ([#1534](https://github.com/openai/openai-python/issues/1534)) ([8b0e493](https://github.com/openai/openai-python/commit/8b0e49302b3fcc32cf02393bf28354c577188904))
+
+## 1.35.10 (2024-07-03)
+
+Full Changelog: [v1.35.9...v1.35.10](https://github.com/openai/openai-python/compare/v1.35.9...v1.35.10)
+
+### Chores
+
+* **ci:** update rye to v0.35.0 ([#1523](https://github.com/openai/openai-python/issues/1523)) ([dd118c4](https://github.com/openai/openai-python/commit/dd118c422019df00b153104b7bddf892c2ec7417))
+
+## 1.35.9 (2024-07-02)
+
+Full Changelog: [v1.35.8...v1.35.9](https://github.com/openai/openai-python/compare/v1.35.8...v1.35.9)
+
+### Bug Fixes
+
+* **client:** always respect content-type multipart/form-data if provided ([#1519](https://github.com/openai/openai-python/issues/1519)) ([6da55e1](https://github.com/openai/openai-python/commit/6da55e10c4ba8c78687baedc68d5599ea120d05c))
+
+
+### Chores
+
+* minor change to tests ([#1521](https://github.com/openai/openai-python/issues/1521)) ([a679c0b](https://github.com/openai/openai-python/commit/a679c0bd1e041434440174daa7a64289746856d1))
+
+## 1.35.8 (2024-07-02)
+
+Full Changelog: [v1.35.7...v1.35.8](https://github.com/openai/openai-python/compare/v1.35.7...v1.35.8)
+
+### Chores
+
+* gitignore test server logs ([#1509](https://github.com/openai/openai-python/issues/1509)) ([936d840](https://github.com/openai/openai-python/commit/936d84094a28ad0a2b4a20e2b3bbf1674048223e))
+* **internal:** add helper method for constructing `BaseModel`s ([#1517](https://github.com/openai/openai-python/issues/1517)) ([e5ddbf5](https://github.com/openai/openai-python/commit/e5ddbf554ce4b6be4b59114a36e69f02ca724acf))
+* **internal:** add reflection helper function ([#1508](https://github.com/openai/openai-python/issues/1508)) ([6044e1b](https://github.com/openai/openai-python/commit/6044e1bbfa9e46a01faf5a9edf198f86fa4c6dd0))
+* **internal:** add rich as a dev dependency ([#1514](https://github.com/openai/openai-python/issues/1514)) ([8a2b4e4](https://github.com/openai/openai-python/commit/8a2b4e4c1233dca916531ebc65d65a8d35fa7b7b))
+
+## 1.35.7 (2024-06-27)
+
+Full Changelog: [v1.35.6...v1.35.7](https://github.com/openai/openai-python/compare/v1.35.6...v1.35.7)
+
+### Bug Fixes
+
+* **build:** include more files in sdist builds ([#1504](https://github.com/openai/openai-python/issues/1504)) ([730c1b5](https://github.com/openai/openai-python/commit/730c1b53b1a61e218a85aa2d1cf3ba4775618755))
+
+## 1.35.6 (2024-06-27)
+
+Full Changelog: [v1.35.5...v1.35.6](https://github.com/openai/openai-python/compare/v1.35.5...v1.35.6)
+
+### Documentation
+
+* **readme:** improve some wording ([#1392](https://github.com/openai/openai-python/issues/1392)) ([a58a052](https://github.com/openai/openai-python/commit/a58a05215b560ebcf3ff3eb1dd997259720a48f3))
+
+## 1.35.5 (2024-06-26)
+
+Full Changelog: [v1.35.4...v1.35.5](https://github.com/openai/openai-python/compare/v1.35.4...v1.35.5)
+
+### Bug Fixes
+
+* **cli/migrate:** avoid reliance on Python 3.12 argument ([be7a06b](https://github.com/openai/openai-python/commit/be7a06b3875e3ecb9229d67a41e290ca218f092d))
+
+## 1.35.4 (2024-06-26)
+
+Full Changelog: [v1.35.3...v1.35.4](https://github.com/openai/openai-python/compare/v1.35.3...v1.35.4)
+
+### Bug Fixes
+
+* **docs:** fix link to advanced python httpx docs ([#1499](https://github.com/openai/openai-python/issues/1499)) ([cf45cd5](https://github.com/openai/openai-python/commit/cf45cd5942cecec569072146673ddfc0e0ec108e))
+* temporarily patch upstream version to fix broken release flow ([#1500](https://github.com/openai/openai-python/issues/1500)) ([4f10470](https://github.com/openai/openai-python/commit/4f10470f5f74fc258a78fa6d897d8ab5b70dcf52))
+
+
+### Chores
+
+* **doc:** clarify service tier default value ([#1496](https://github.com/openai/openai-python/issues/1496)) ([ba39667](https://github.com/openai/openai-python/commit/ba39667c4faa8e10457347be41334ca9639186d4))
+
+## 1.35.3 (2024-06-20)
+
+Full Changelog: [v1.35.2...v1.35.3](https://github.com/openai/openai-python/compare/v1.35.2...v1.35.3)
+
+### Bug Fixes
+
+* **tests:** add explicit type annotation ([9345f10](https://github.com/openai/openai-python/commit/9345f104889056b2ef6646d65375925a0a3bae03))
+
+## 1.35.2 (2024-06-20)
+
+Full Changelog: [v1.35.1...v1.35.2](https://github.com/openai/openai-python/compare/v1.35.1...v1.35.2)
+
+### Bug Fixes
+
+* **api:** add missing parallel_tool_calls arguments ([4041e4f](https://github.com/openai/openai-python/commit/4041e4f6ea1e2316179a82031001308be23a2524))
+
+## 1.35.1 (2024-06-19)
+
+Full Changelog: [v1.35.0...v1.35.1](https://github.com/openai/openai-python/compare/v1.35.0...v1.35.1)
+
+### Bug Fixes
+
+* **client/async:** avoid blocking io call for platform headers ([#1488](https://github.com/openai/openai-python/issues/1488)) ([ae64c05](https://github.com/openai/openai-python/commit/ae64c05cbae76a58b592d913bee6ac1ef9611d4c))
+
+## 1.35.0 (2024-06-18)
+
+Full Changelog: [v1.34.0...v1.35.0](https://github.com/openai/openai-python/compare/v1.34.0...v1.35.0)
+
+### Features
+
+* **api:** add service tier argument for chat completions ([#1486](https://github.com/openai/openai-python/issues/1486)) ([b4b4e66](https://github.com/openai/openai-python/commit/b4b4e660b8bb7ae937787fcab9b40feaeba7f711))
+
+## 1.34.0 (2024-06-12)
+
+Full Changelog: [v1.33.0...v1.34.0](https://github.com/openai/openai-python/compare/v1.33.0...v1.34.0)
+
+### Features
+
+* **api:** updates ([#1481](https://github.com/openai/openai-python/issues/1481)) ([b83db36](https://github.com/openai/openai-python/commit/b83db362f0c9a5a4d55588b954fb1df1a68c98e3))
+
+## 1.33.0 (2024-06-07)
+
+Full Changelog: [v1.32.1...v1.33.0](https://github.com/openai/openai-python/compare/v1.32.1...v1.33.0)
+
+### Features
+
+* **api:** adding chunking_strategy to polling helpers ([#1478](https://github.com/openai/openai-python/issues/1478)) ([83be2a1](https://github.com/openai/openai-python/commit/83be2a13e0384d3de52190d86ccb1b5d7a197d84))
+
+## 1.32.1 (2024-06-07)
+
+Full Changelog: [v1.32.0...v1.32.1](https://github.com/openai/openai-python/compare/v1.32.0...v1.32.1)
+
+### Bug Fixes
+
+* remove erroneous thread create argument ([#1476](https://github.com/openai/openai-python/issues/1476)) ([43175c4](https://github.com/openai/openai-python/commit/43175c40e607d626a77a151691778c35a0e60eec))
+
+## 1.32.0 (2024-06-06)
+
+Full Changelog: [v1.31.2...v1.32.0](https://github.com/openai/openai-python/compare/v1.31.2...v1.32.0)
+
+### Features
+
+* **api:** updates ([#1474](https://github.com/openai/openai-python/issues/1474)) ([87ddff0](https://github.com/openai/openai-python/commit/87ddff0e6e64650691a8e32f7477b7a00e06ed23))
+
+## 1.31.2 (2024-06-06)
+
+Full Changelog: [v1.31.1...v1.31.2](https://github.com/openai/openai-python/compare/v1.31.1...v1.31.2)
+
+### Chores
+
+* **internal:** minor refactor of tests ([#1471](https://github.com/openai/openai-python/issues/1471)) ([b7f2298](https://github.com/openai/openai-python/commit/b7f229866f249d16e995db361b923bb4c0b7f1d4))
+
+## 1.31.1 (2024-06-05)
+
+Full Changelog: [v1.31.0...v1.31.1](https://github.com/openai/openai-python/compare/v1.31.0...v1.31.1)
+
+### Chores
+
+* **internal:** minor change to tests ([#1466](https://github.com/openai/openai-python/issues/1466)) ([cb33e71](https://github.com/openai/openai-python/commit/cb33e7152f25fb16cf4c39a6e4714169c62d6af8))
+
+## 1.31.0 (2024-06-03)
+
+Full Changelog: [v1.30.5...v1.31.0](https://github.com/openai/openai-python/compare/v1.30.5...v1.31.0)
+
+### Features
+
+* **api:** updates ([#1461](https://github.com/openai/openai-python/issues/1461)) ([0d7cc5e](https://github.com/openai/openai-python/commit/0d7cc5e48c565fe10ee6e8ca4d050175eb543bcb))
+
+
+### Chores
+
+* fix lint ([1886dd4](https://github.com/openai/openai-python/commit/1886dd4c98d7a7b3a679bff739cb38badf5ae96c))
+
+## 1.30.5 (2024-05-29)
+
+Full Changelog: [v1.30.4...v1.30.5](https://github.com/openai/openai-python/compare/v1.30.4...v1.30.5)
+
+### Chores
+
+* **internal:** fix lint issue ([35a1e80](https://github.com/openai/openai-python/commit/35a1e806891c34d5cc13ac8341751e5b15b52319))
+
+## 1.30.4 (2024-05-28)
+
+Full Changelog: [v1.30.3...v1.30.4](https://github.com/openai/openai-python/compare/v1.30.3...v1.30.4)
+
+### Chores
+
+* add missing __all__ definitions ([7fba60f](https://github.com/openai/openai-python/commit/7fba60f2e8adc26e83080aaf3e436eb9891e1253))
+* **internal:** fix lint issue ([f423cd0](https://github.com/openai/openai-python/commit/f423cd05d33b3e734eda7c0c008faac14ae96bb7))
+
+## 1.30.3 (2024-05-24)
+
+Full Changelog: [v1.30.2...v1.30.3](https://github.com/openai/openai-python/compare/v1.30.2...v1.30.3)
+
+### Chores
+
+* **ci:** update rye install location ([#1440](https://github.com/openai/openai-python/issues/1440)) ([8a0e5bf](https://github.com/openai/openai-python/commit/8a0e5bf4c03d9c714799fad43be68ac9c2b1f37a))
+* **internal:** bump pyright ([#1442](https://github.com/openai/openai-python/issues/1442)) ([64a151e](https://github.com/openai/openai-python/commit/64a151eae705d55484f870df461434c0a6961e2b))
+* **internal:** fix lint issue ([#1444](https://github.com/openai/openai-python/issues/1444)) ([b0eb458](https://github.com/openai/openai-python/commit/b0eb4582e050b0a25af3d80d2cb584bfc7cd11ab))
+
+
+### Documentation
+
+* **contributing:** update references to rye-up.com ([dcc34a2](https://github.com/openai/openai-python/commit/dcc34a26d1a6a0debf440724fad658c77547048c))
+
+## 1.30.2 (2024-05-23)
+
+Full Changelog: [v1.30.1...v1.30.2](https://github.com/openai/openai-python/compare/v1.30.1...v1.30.2)
+
+### Chores
+
+* **ci:** update rye install location ([#1436](https://github.com/openai/openai-python/issues/1436)) ([f7cc4e7](https://github.com/openai/openai-python/commit/f7cc4e7d5d0964a4a5d53e602379770c2576e1aa))
+
+## 1.30.1 (2024-05-14)
+
+Full Changelog: [v1.30.0...v1.30.1](https://github.com/openai/openai-python/compare/v1.30.0...v1.30.1)
+
+### Chores
+
+* **internal:** add slightly better logging to scripts ([#1422](https://github.com/openai/openai-python/issues/1422)) ([43dffab](https://github.com/openai/openai-python/commit/43dffabb3bed4edf8a6e523cbb289f733a5f9b24))
+
+## 1.30.0 (2024-05-14)
+
+Full Changelog: [v1.29.0...v1.30.0](https://github.com/openai/openai-python/compare/v1.29.0...v1.30.0)
+
+### Features
+
+* **api:** add incomplete state ([#1420](https://github.com/openai/openai-python/issues/1420)) ([6484984](https://github.com/openai/openai-python/commit/648498412d1c7740e6b67ed4d0a55b89ff29d3b1))
+
+## 1.29.0 (2024-05-13)
+
+Full Changelog: [v1.28.2...v1.29.0](https://github.com/openai/openai-python/compare/v1.28.2...v1.29.0)
+
+### Features
+
+* **api:** add gpt-4o model ([#1417](https://github.com/openai/openai-python/issues/1417)) ([4f09f8c](https://github.com/openai/openai-python/commit/4f09f8c6cc4450f5e61f158f1bd54c513063a1a8))
+
+## 1.28.2 (2024-05-13)
+
+Full Changelog: [v1.28.1...v1.28.2](https://github.com/openai/openai-python/compare/v1.28.1...v1.28.2)
+
+### Bug Fixes
+
+* **client:** accidental blocking sleep in async code ([#1415](https://github.com/openai/openai-python/issues/1415)) ([0ac6ecb](https://github.com/openai/openai-python/commit/0ac6ecb8d4e52f895bc3ae1f589f22ddaaef6204))
+
+
+### Chores
+
+* **internal:** bump pydantic dependency ([#1413](https://github.com/openai/openai-python/issues/1413)) ([ed73d1d](https://github.com/openai/openai-python/commit/ed73d1db540714e29a1ba30e3aa6429aae8b1dd8))
+
+## 1.28.1 (2024-05-11)
+
+Full Changelog: [v1.28.0...v1.28.1](https://github.com/openai/openai-python/compare/v1.28.0...v1.28.1)
+
+### Chores
+
+* **docs:** add SECURITY.md ([#1408](https://github.com/openai/openai-python/issues/1408)) ([119970a](https://github.com/openai/openai-python/commit/119970a31b67e88c623d50855290ccf3847c10eb))
+
+## 1.28.0 (2024-05-09)
+
+Full Changelog: [v1.27.0...v1.28.0](https://github.com/openai/openai-python/compare/v1.27.0...v1.28.0)
+
+### Features
+
+* **api:** add message image content ([#1405](https://github.com/openai/openai-python/issues/1405)) ([a115de6](https://github.com/openai/openai-python/commit/a115de60ce1ca503a7659bb9a19c18699d4d9bcb))
+
+## 1.27.0 (2024-05-08)
+
+Full Changelog: [v1.26.0...v1.27.0](https://github.com/openai/openai-python/compare/v1.26.0...v1.27.0)
+
+### Features
+
+* **api:** adding file purposes ([#1401](https://github.com/openai/openai-python/issues/1401)) ([2e9d0bd](https://github.com/openai/openai-python/commit/2e9d0bd0e4bf677ed9b21c6448e804313e026441))
+
+## 1.26.0 (2024-05-06)
+
+Full Changelog: [v1.25.2...v1.26.0](https://github.com/openai/openai-python/compare/v1.25.2...v1.26.0)
+
+### Features
+
+* **api:** add usage metadata when streaming ([#1395](https://github.com/openai/openai-python/issues/1395)) ([3cb064b](https://github.com/openai/openai-python/commit/3cb064b10d661dbcc74b6bc1ed7d8e635ab2876a))
+
+## 1.25.2 (2024-05-05)
+
+Full Changelog: [v1.25.1...v1.25.2](https://github.com/openai/openai-python/compare/v1.25.1...v1.25.2)
+
+### Documentation
+
+* **readme:** fix misleading timeout example value ([#1393](https://github.com/openai/openai-python/issues/1393)) ([3eba8e7](https://github.com/openai/openai-python/commit/3eba8e7573ec1bf4231a304c8eabc8a8d077f46d))
+
+## 1.25.1 (2024-05-02)
+
+Full Changelog: [v1.25.0...v1.25.1](https://github.com/openai/openai-python/compare/v1.25.0...v1.25.1)
+
+### Chores
+
+* **internal:** bump prism version ([#1390](https://github.com/openai/openai-python/issues/1390)) ([a5830fc](https://github.com/openai/openai-python/commit/a5830fc1c5ffd21e2010490905084ad5614212a3))
+
+## 1.25.0 (2024-05-01)
+
+Full Changelog: [v1.24.1...v1.25.0](https://github.com/openai/openai-python/compare/v1.24.1...v1.25.0)
+
+### Features
+
+* **api:** delete messages ([#1388](https://github.com/openai/openai-python/issues/1388)) ([d0597cd](https://github.com/openai/openai-python/commit/d0597cdc1813cddffacbaa50565e86d2420d1873))
+
+## 1.24.1 (2024-04-30)
+
+Full Changelog: [v1.24.0...v1.24.1](https://github.com/openai/openai-python/compare/v1.24.0...v1.24.1)
+
+### Chores
+
+* **internal:** add link to openapi spec ([#1385](https://github.com/openai/openai-python/issues/1385)) ([b315d04](https://github.com/openai/openai-python/commit/b315d04e9624ec3a841d7c51813bb553640c23ce))
+
+## 1.24.0 (2024-04-29)
+
+Full Changelog: [v1.23.6...v1.24.0](https://github.com/openai/openai-python/compare/v1.23.6...v1.24.0)
+
+### Features
+
+* **api:** add required tool_choice ([#1382](https://github.com/openai/openai-python/issues/1382)) ([c558f65](https://github.com/openai/openai-python/commit/c558f651df39f61425cd4109318f78ed94cbf163))
+
+
+### Chores
+
+* **client:** log response headers in debug mode ([#1383](https://github.com/openai/openai-python/issues/1383)) ([f31a426](https://github.com/openai/openai-python/commit/f31a4261adc4ebd92582cee264e41eb6a6dafc57))
+* **internal:** minor reformatting ([#1377](https://github.com/openai/openai-python/issues/1377)) ([7003dbb](https://github.com/openai/openai-python/commit/7003dbb863b6e16381070b8b86ac24aa070a3799))
+* **internal:** reformat imports ([#1375](https://github.com/openai/openai-python/issues/1375)) ([2ad0c3b](https://github.com/openai/openai-python/commit/2ad0c3b8e0b746ed20db3c84a9c6a369aa10bf5d))
+
+## 1.23.6 (2024-04-25)
+
+Full Changelog: [v1.23.5...v1.23.6](https://github.com/openai/openai-python/compare/v1.23.5...v1.23.6)
+
+### Chores
+
+* **internal:** update test helper function ([#1371](https://github.com/openai/openai-python/issues/1371)) ([6607c4a](https://github.com/openai/openai-python/commit/6607c4a491fd1912f9222d6fe464ccef6e865eac))
+
+## 1.23.5 (2024-04-24)
+
+Full Changelog: [v1.23.4...v1.23.5](https://github.com/openai/openai-python/compare/v1.23.4...v1.23.5)
+
+### Chores
+
+* **internal:** use actions/checkout@v4 for codeflow ([#1368](https://github.com/openai/openai-python/issues/1368)) ([d1edf8b](https://github.com/openai/openai-python/commit/d1edf8beb806ebaefdcc2cb6e39f99e1811a2668))
+
+## 1.23.4 (2024-04-24)
+
+Full Changelog: [v1.23.3...v1.23.4](https://github.com/openai/openai-python/compare/v1.23.3...v1.23.4)
+
+### Bug Fixes
+
+* **api:** change timestamps to unix integers ([#1367](https://github.com/openai/openai-python/issues/1367)) ([fbc0e15](https://github.com/openai/openai-python/commit/fbc0e15f422971bd15499d4ea5f42a1c885c7004))
+* **docs:** doc improvements ([#1364](https://github.com/openai/openai-python/issues/1364)) ([8c3a005](https://github.com/openai/openai-python/commit/8c3a005247ea045b9a95e7459eba2a90067daf71))
+
+
+### Chores
+
+* **tests:** rename test file ([#1366](https://github.com/openai/openai-python/issues/1366)) ([4204e63](https://github.com/openai/openai-python/commit/4204e63e27584c68ad27825261225603d7a87008))
+
+## 1.23.3 (2024-04-23)
+
+Full Changelog: [v1.23.2...v1.23.3](https://github.com/openai/openai-python/compare/v1.23.2...v1.23.3)
+
+### Chores
+
+* **internal:** restructure imports ([#1359](https://github.com/openai/openai-python/issues/1359)) ([4e5eb37](https://github.com/openai/openai-python/commit/4e5eb374ea0545a6117db657bb05f6417bc62d18))
+
+## 1.23.2 (2024-04-19)
+
+Full Changelog: [v1.23.1...v1.23.2](https://github.com/openai/openai-python/compare/v1.23.1...v1.23.2)
+
+### Bug Fixes
+
+* **api:** correct types for message attachment tools ([#1348](https://github.com/openai/openai-python/issues/1348)) ([78a6261](https://github.com/openai/openai-python/commit/78a6261eaad7839284903287d4f647d9cb4ced0b))
+
+## 1.23.1 (2024-04-18)
+
+Full Changelog: [v1.23.0...v1.23.1](https://github.com/openai/openai-python/compare/v1.23.0...v1.23.1)
+
+### Bug Fixes
+
+* **api:** correct types for attachments ([#1342](https://github.com/openai/openai-python/issues/1342)) ([542d30c](https://github.com/openai/openai-python/commit/542d30c6dad4e139bf3eb443936d42b7b42dad54))
+
+## 1.23.0 (2024-04-18)
+
+Full Changelog: [v1.22.0...v1.23.0](https://github.com/openai/openai-python/compare/v1.22.0...v1.23.0)
+
+### Features
+
+* **api:** add request id property to response classes ([#1341](https://github.com/openai/openai-python/issues/1341)) ([444d680](https://github.com/openai/openai-python/commit/444d680cbb3745adbc27788213ae3312567136a8))
+
+
+### Documentation
+
+* **helpers:** fix example snippets ([#1339](https://github.com/openai/openai-python/issues/1339)) ([8929088](https://github.com/openai/openai-python/commit/8929088b206a04b4c5b85fb69b0b983fb56f9b03))
+
+## 1.22.0 (2024-04-18)
+
+Full Changelog: [v1.21.2...v1.22.0](https://github.com/openai/openai-python/compare/v1.21.2...v1.22.0)
+
+### Features
+
+* **api:** batch list endpoint ([#1338](https://github.com/openai/openai-python/issues/1338)) ([a776f38](https://github.com/openai/openai-python/commit/a776f387e3159f9a8f4dcaa7d0d3b78c2a884f91))
+
+
+### Chores
+
+* **internal:** ban usage of lru_cache ([#1331](https://github.com/openai/openai-python/issues/1331)) ([8f9223b](https://github.com/openai/openai-python/commit/8f9223bfe13200c685fc97c25ada3015a69c6df7))
+* **internal:** bump pyright to 1.1.359 ([#1337](https://github.com/openai/openai-python/issues/1337)) ([feec0dd](https://github.com/openai/openai-python/commit/feec0dd1dd243941a279c3224c5ca1d727d76676))
+
+## 1.21.2 (2024-04-17)
+
+Full Changelog: [v1.21.1...v1.21.2](https://github.com/openai/openai-python/compare/v1.21.1...v1.21.2)
+
+### Chores
+
+* **internal:** add lru_cache helper function ([#1329](https://github.com/openai/openai-python/issues/1329)) ([cbeebfc](https://github.com/openai/openai-python/commit/cbeebfcca8bf1a3feb4462a79e10099bda5bed84))
+
+## 1.21.1 (2024-04-17)
+
+Full Changelog: [v1.21.0...v1.21.1](https://github.com/openai/openai-python/compare/v1.21.0...v1.21.1)
+
+### Chores
+
+* **api:** docs and response_format response property ([#1327](https://github.com/openai/openai-python/issues/1327)) ([7a6d142](https://github.com/openai/openai-python/commit/7a6d142f013994c4eb9a4f55888464c885f8baf0))
+
+## 1.21.0 (2024-04-17)
+
+Full Changelog: [v1.20.0...v1.21.0](https://github.com/openai/openai-python/compare/v1.20.0...v1.21.0)
+
+### Features
+
+* **api:** add vector stores ([#1325](https://github.com/openai/openai-python/issues/1325)) ([038a3c5](https://github.com/openai/openai-python/commit/038a3c50db7b6a88f54ff1cd1ff6cbaef2caf87f))
+
+## 1.20.0 (2024-04-16)
+
+Full Changelog: [v1.19.0...v1.20.0](https://github.com/openai/openai-python/compare/v1.19.0...v1.20.0)
+
+### Features
+
+* **client:** add header OpenAI-Project ([#1320](https://github.com/openai/openai-python/issues/1320)) ([0c489f1](https://github.com/openai/openai-python/commit/0c489f16a7d9e5ac753da87273b223893edefa69))
+* extract chat models to a named enum ([#1322](https://github.com/openai/openai-python/issues/1322)) ([1ccd9b6](https://github.com/openai/openai-python/commit/1ccd9b67322736a4714e58c953d59585322c527d))
+
+## 1.19.0 (2024-04-15)
+
+Full Changelog: [v1.18.0...v1.19.0](https://github.com/openai/openai-python/compare/v1.18.0...v1.19.0)
+
+### Features
+
+* **errors:** add request_id property ([#1317](https://github.com/openai/openai-python/issues/1317)) ([f9eb77d](https://github.com/openai/openai-python/commit/f9eb77dca422b9456f4e3b31c7474046235eec1d))
+
+## 1.18.0 (2024-04-15)
+
+Full Changelog: [v1.17.1...v1.18.0](https://github.com/openai/openai-python/compare/v1.17.1...v1.18.0)
+
+### Features
+
+* **api:** add batch API ([#1316](https://github.com/openai/openai-python/issues/1316)) ([3e6f19e](https://github.com/openai/openai-python/commit/3e6f19e6e7489bf1c94944a5f8f9b1d4535cdc43))
+* **api:** updates ([#1314](https://github.com/openai/openai-python/issues/1314)) ([8281dc9](https://github.com/openai/openai-python/commit/8281dc956178f5de345645660081f7d0c15a57a6))
+
+## 1.17.1 (2024-04-12)
+
+Full Changelog: [v1.17.0...v1.17.1](https://github.com/openai/openai-python/compare/v1.17.0...v1.17.1)
+
+### Chores
+
+* fix typo ([#1304](https://github.com/openai/openai-python/issues/1304)) ([1129082](https://github.com/openai/openai-python/commit/1129082955f98d76c0927781ef9e7d0beeda2ec4))
+* **internal:** formatting ([#1311](https://github.com/openai/openai-python/issues/1311)) ([8fd411b](https://github.com/openai/openai-python/commit/8fd411b48b6b1eafaab2dac26201525c1ee0b942))
+
+## 1.17.0 (2024-04-10)
+
+Full Changelog: [v1.16.2...v1.17.0](https://github.com/openai/openai-python/compare/v1.16.2...v1.17.0)
+
+### Features
+
+* **api:** add additional messages when creating thread run ([#1298](https://github.com/openai/openai-python/issues/1298)) ([70eb081](https://github.com/openai/openai-python/commit/70eb081804b14cc8c151ebd85458545a50a074fd))
+* **client:** add DefaultHttpxClient and DefaultAsyncHttpxClient ([#1302](https://github.com/openai/openai-python/issues/1302)) ([69cdfc3](https://github.com/openai/openai-python/commit/69cdfc319fff7ebf28cdd13cc6c1761b7d97811d))
+* **models:** add to_dict & to_json helper methods ([#1305](https://github.com/openai/openai-python/issues/1305)) ([40a881d](https://github.com/openai/openai-python/commit/40a881d10442af8b445ce030f8ab338710e1c4c8))
+
+## 1.16.2 (2024-04-04)
+
+Full Changelog: [v1.16.1...v1.16.2](https://github.com/openai/openai-python/compare/v1.16.1...v1.16.2)
+
+### Bug Fixes
+
+* **client:** correct logic for line decoding in streaming ([#1293](https://github.com/openai/openai-python/issues/1293)) ([687caef](https://github.com/openai/openai-python/commit/687caefa4acf615bf404f16817bfd9a6f285ee5c))
+
+## 1.16.1 (2024-04-02)
+
+Full Changelog: [v1.16.0...v1.16.1](https://github.com/openai/openai-python/compare/v1.16.0...v1.16.1)
+
+### Chores
+
+* **internal:** defer model build for import latency ([#1291](https://github.com/openai/openai-python/issues/1291)) ([bc6866e](https://github.com/openai/openai-python/commit/bc6866eb2335d01532190d0906cad7bf9af28621))
+
+## 1.16.0 (2024-04-01)
+
+Full Changelog: [v1.15.0...v1.16.0](https://github.com/openai/openai-python/compare/v1.15.0...v1.16.0)
+
+### Features
+
+* **api:** add support for filtering messages by run_id ([#1288](https://github.com/openai/openai-python/issues/1288)) ([58d6b77](https://github.com/openai/openai-python/commit/58d6b773218ef1dd8dc6208124a16078e4ac11c1))
+* **api:** run polling helpers ([#1289](https://github.com/openai/openai-python/issues/1289)) ([6b427f3](https://github.com/openai/openai-python/commit/6b427f38610847bce3ce5334177f07917bd7c187))
+
+
+### Chores
+
+* **client:** validate that max_retries is not None ([#1286](https://github.com/openai/openai-python/issues/1286)) ([aa5920a](https://github.com/openai/openai-python/commit/aa5920af6131c49a44352524154ee4a1684e76b2))
+
+
+### Refactors
+
+* rename createAndStream to stream ([6b427f3](https://github.com/openai/openai-python/commit/6b427f38610847bce3ce5334177f07917bd7c187))
+
+## 1.15.0 (2024-03-31)
+
+Full Changelog: [v1.14.3...v1.15.0](https://github.com/openai/openai-python/compare/v1.14.3...v1.15.0)
+
+### Features
+
+* **api:** adding temperature parameter ([#1282](https://github.com/openai/openai-python/issues/1282)) ([0e68fd3](https://github.com/openai/openai-python/commit/0e68fd3690155785d1fb0ee9a8604f51e6701b1d))
+* **client:** increase default HTTP max_connections to 1000 and max_keepalive_connections to 100 ([#1281](https://github.com/openai/openai-python/issues/1281)) ([340d139](https://github.com/openai/openai-python/commit/340d1391e3071a265ed12c0a8d70d4d73a860bd8))
+* **package:** export default constants ([#1275](https://github.com/openai/openai-python/issues/1275)) ([fdc126e](https://github.com/openai/openai-python/commit/fdc126e428320f1bed5eabd3eed229f08ab9effa))
+
+
+### Bug Fixes
+
+* **project:** use absolute github links on PyPi ([#1280](https://github.com/openai/openai-python/issues/1280)) ([94cd528](https://github.com/openai/openai-python/commit/94cd52837650e5b7e115119d69e6b1c7ba1f6bf1))
+
+
+### Chores
+
+* **internal:** bump dependencies ([#1273](https://github.com/openai/openai-python/issues/1273)) ([18dcd65](https://github.com/openai/openai-python/commit/18dcd654d9f54628b5fe21a499d1fef500e15f7f))
+
+
+### Documentation
+
+* **readme:** change undocumented params wording ([#1284](https://github.com/openai/openai-python/issues/1284)) ([7498ef1](https://github.com/openai/openai-python/commit/7498ef1e9568200086ba3efb99ea100feb05e3f0))
+
+## 1.14.3 (2024-03-25)
+
+Full Changelog: [v1.14.2...v1.14.3](https://github.com/openai/openai-python/compare/v1.14.2...v1.14.3)
+
+### Bug Fixes
+
+* revert regression with 3.7 support ([#1269](https://github.com/openai/openai-python/issues/1269)) ([37aed56](https://github.com/openai/openai-python/commit/37aed564143dc7281f1eaa6ab64ec5ca334cf25e))
+
+
+### Chores
+
+* **internal:** construct error properties instead of using the raw response ([#1257](https://github.com/openai/openai-python/issues/1257)) ([11dce5c](https://github.com/openai/openai-python/commit/11dce5c66395722b245f5d5461ce379ca7b939e4))
+* **internal:** formatting change ([#1258](https://github.com/openai/openai-python/issues/1258)) ([b907dd7](https://github.com/openai/openai-python/commit/b907dd7dcae895e4209559da061d0991a8d640a6))
+* **internal:** loosen input type for util function ([#1250](https://github.com/openai/openai-python/issues/1250)) ([fc8b4c3](https://github.com/openai/openai-python/commit/fc8b4c37dc91dfcc0535c19236092992171784a0))
+
+
+### Documentation
+
+* **contributing:** fix typo ([#1264](https://github.com/openai/openai-python/issues/1264)) ([835cb9b](https://github.com/openai/openai-python/commit/835cb9b2f92e2aa3329545b4677865dcd4fd00f0))
+* **readme:** consistent use of sentence case in headings ([#1255](https://github.com/openai/openai-python/issues/1255)) ([519f371](https://github.com/openai/openai-python/commit/519f371af779b5fa353292ff5a2d3332afe0987e))
+* **readme:** document how to make undocumented requests ([#1256](https://github.com/openai/openai-python/issues/1256)) ([5887858](https://github.com/openai/openai-python/commit/5887858a7b649dfde5b733ef01e5cffcf953b2a7))
+
+## 1.14.2 (2024-03-19)
+
+Full Changelog: [v1.14.1...v1.14.2](https://github.com/openai/openai-python/compare/v1.14.1...v1.14.2)
+
+### Performance Improvements
+
+* cache TypeAdapters ([#1114](https://github.com/openai/openai-python/issues/1114)) ([41b6fee](https://github.com/openai/openai-python/commit/41b6feec70d3f203e36ba9a92205389bafce930c))
+* cache TypeAdapters ([#1243](https://github.com/openai/openai-python/issues/1243)) ([2005076](https://github.com/openai/openai-python/commit/2005076f500bef6e0a6cc8f935b9cc9fef65ab5b))
+
+
+### Chores
+
+* **internal:** update generated pragma comment ([#1247](https://github.com/openai/openai-python/issues/1247)) ([3eeb9b3](https://github.com/openai/openai-python/commit/3eeb9b3a71e01c2593be443a97a353371466d01a))
+
+
+### Documentation
+
+* assistant improvements ([#1249](https://github.com/openai/openai-python/issues/1249)) ([e7a3176](https://github.com/openai/openai-python/commit/e7a3176b7606822bd5ad8f7fece87de6aad1e5b6))
+* fix typo in CONTRIBUTING.md ([#1245](https://github.com/openai/openai-python/issues/1245)) ([adef57a](https://github.com/openai/openai-python/commit/adef57ae5c71734873ba49bccd92fa7f28068d28))
+
+## 1.14.1 (2024-03-15)
+
+Full Changelog: [v1.14.0...v1.14.1](https://github.com/openai/openai-python/compare/v1.14.0...v1.14.1)
+
+### Documentation
+
+* **readme:** assistant streaming ([#1238](https://github.com/openai/openai-python/issues/1238)) ([0fc30a2](https://github.com/openai/openai-python/commit/0fc30a23030b4ff60f27cd2f472517926ed0f300))
+
+## 1.14.0 (2024-03-13)
+
+Full Changelog: [v1.13.4...v1.14.0](https://github.com/openai/openai-python/compare/v1.13.4...v1.14.0)
+
+### Features
+
+* **assistants:** add support for streaming ([#1233](https://github.com/openai/openai-python/issues/1233)) ([17635dc](https://github.com/openai/openai-python/commit/17635dccbeddf153f8201dbca18b44e16a1799b2))
+
+## 1.13.4 (2024-03-13)
+
+Full Changelog: [v1.13.3...v1.13.4](https://github.com/openai/openai-python/compare/v1.13.3...v1.13.4)
+
+### Bug Fixes
+
+* **streaming:** improve error messages ([#1218](https://github.com/openai/openai-python/issues/1218)) ([4f5ff29](https://github.com/openai/openai-python/commit/4f5ff298601b5a8bfbf0a9d0c0d1329d1502a205))
+
+
+### Chores
+
+* **api:** update docs ([#1212](https://github.com/openai/openai-python/issues/1212)) ([71236e0](https://github.com/openai/openai-python/commit/71236e0de4012a249af4c1ffd95973a8ba4fa61f))
+* **client:** improve error message for invalid http_client argument ([#1216](https://github.com/openai/openai-python/issues/1216)) ([d0c928a](https://github.com/openai/openai-python/commit/d0c928abbd99020fe828350f3adfd10c638a2eed))
+* **docs:** mention install from git repo ([#1203](https://github.com/openai/openai-python/issues/1203)) ([3ab6f44](https://github.com/openai/openai-python/commit/3ab6f447ffd8d2394e58416e401e545a99ec85af))
+* export NOT_GIVEN sentinel value ([#1223](https://github.com/openai/openai-python/issues/1223)) ([8a4f76f](https://github.com/openai/openai-python/commit/8a4f76f992c66f20cd6aa070c8dc4839e4cf9f3c))
+* **internal:** add core support for deserializing into number response ([#1219](https://github.com/openai/openai-python/issues/1219)) ([004bc92](https://github.com/openai/openai-python/commit/004bc924ea579852b9266ca11aea93463cf75104))
+* **internal:** bump pyright ([#1221](https://github.com/openai/openai-python/issues/1221)) ([3c2e815](https://github.com/openai/openai-python/commit/3c2e815311ace4ff81ccd446b23ff50a4e099485))
+* **internal:** improve deserialisation of discriminated unions ([#1227](https://github.com/openai/openai-python/issues/1227)) ([4767259](https://github.com/openai/openai-python/commit/4767259d25ac135550b37b15e4c0497e5ff0330d))
+* **internal:** minor core client restructuring ([#1199](https://github.com/openai/openai-python/issues/1199)) ([4314cdc](https://github.com/openai/openai-python/commit/4314cdcd522537e6cbbd87206d5bb236f672ce05))
+* **internal:** split up transforms into sync / async ([#1210](https://github.com/openai/openai-python/issues/1210)) ([7853a83](https://github.com/openai/openai-python/commit/7853a8358864957cc183581bdf7c03810a7b2756))
+* **internal:** support more input types ([#1211](https://github.com/openai/openai-python/issues/1211)) ([d0e4baa](https://github.com/openai/openai-python/commit/d0e4baa40d32c2da0ce5ceef8e0c7193b98f2b5a))
+* **internal:** support parsing Annotated types ([#1222](https://github.com/openai/openai-python/issues/1222)) ([8598f81](https://github.com/openai/openai-python/commit/8598f81841eeab0ab00eb21fdec7e8756ffde909))
+* **types:** include discriminators in unions ([#1228](https://github.com/openai/openai-python/issues/1228)) ([3ba0dcc](https://github.com/openai/openai-python/commit/3ba0dcc19a2af0ef869c77da2805278f71ee96c2))
+
+
+### Documentation
+
+* **contributing:** improve wording ([#1201](https://github.com/openai/openai-python/issues/1201)) ([95a1e0e](https://github.com/openai/openai-python/commit/95a1e0ea8e5446c413606847ebf9e35afbc62bf9))
+
+## 1.13.3 (2024-02-28)
+
+Full Changelog: [v1.13.2...v1.13.3](https://github.com/openai/openai-python/compare/v1.13.2...v1.13.3)
+
+### Features
+
+* **api:** add wav and pcm to response_format ([#1189](https://github.com/openai/openai-python/issues/1189)) ([dbd20fc](https://github.com/openai/openai-python/commit/dbd20fc42e93358261f71b9aa0e5f955053c3825))
+
+
+### Chores
+
+* **client:** use anyio.sleep instead of asyncio.sleep ([#1198](https://github.com/openai/openai-python/issues/1198)) ([b6d025b](https://github.com/openai/openai-python/commit/b6d025b54f091e79f5d4a0a8923f29574fd66027))
+* **internal:** bump pyright ([#1193](https://github.com/openai/openai-python/issues/1193)) ([9202e04](https://github.com/openai/openai-python/commit/9202e04d07a7c47232f39196346c734869b8f55a))
+* **types:** extract run status to a named type ([#1178](https://github.com/openai/openai-python/issues/1178)) ([249ecbd](https://github.com/openai/openai-python/commit/249ecbdeb6566a385ec46dfd5000b4eaa03965f0))
+
+
+### Documentation
+
+* add note in azure_deployment docstring ([#1188](https://github.com/openai/openai-python/issues/1188)) ([96fa995](https://github.com/openai/openai-python/commit/96fa99572dd76ee708f2bae04d11b659cdd698b2))
+* **examples:** add pyaudio streaming example ([#1194](https://github.com/openai/openai-python/issues/1194)) ([3683c5e](https://github.com/openai/openai-python/commit/3683c5e3c7f07e4b789a0c4cc417b2c59539cae2))
+
+## 1.13.2 (2024-02-20)
+
+Full Changelog: [v1.13.1...v1.13.2](https://github.com/openai/openai-python/compare/v1.13.1...v1.13.2)
+
+### Bug Fixes
+
+* **ci:** revert "move github release logic to github app" ([#1170](https://github.com/openai/openai-python/issues/1170)) ([f1adc2e](https://github.com/openai/openai-python/commit/f1adc2e6f2f29acb4404e84137a9d3109714c585))
+
+## 1.13.1 (2024-02-20)
+
+Full Changelog: [v1.13.0...v1.13.1](https://github.com/openai/openai-python/compare/v1.13.0...v1.13.1)
+
+### Chores
+
+* **internal:** bump rye to v0.24.0 ([#1168](https://github.com/openai/openai-python/issues/1168)) ([84c4256](https://github.com/openai/openai-python/commit/84c4256316f2a79068ecadb852e5e69b6b104a1f))
+
+## 1.13.0 (2024-02-19)
+
+Full Changelog: [v1.12.0...v1.13.0](https://github.com/openai/openai-python/compare/v1.12.0...v1.13.0)
+
+### Features
+
+* **api:** updates ([#1146](https://github.com/openai/openai-python/issues/1146)) ([79b7675](https://github.com/openai/openai-python/commit/79b7675e51fb7d269a6ea281a568bc7812ba2ace))
+
+
+### Bug Fixes
+
+* **api:** remove non-GA instance_id param ([#1164](https://github.com/openai/openai-python/issues/1164)) ([1abe139](https://github.com/openai/openai-python/commit/1abe139b1a5f5cc41263738fc12856056dce5697))
+
+
+### Chores
+
+* **ci:** move github release logic to github app ([#1155](https://github.com/openai/openai-python/issues/1155)) ([67cfac2](https://github.com/openai/openai-python/commit/67cfac2564dfb718da0465e34b90ac6928fa962a))
+* **client:** use correct accept headers for binary data ([#1161](https://github.com/openai/openai-python/issues/1161)) ([e536437](https://github.com/openai/openai-python/commit/e536437ae0b2cb0ddf2d74618722005d37403f32))
+* **internal:** refactor release environment script ([#1158](https://github.com/openai/openai-python/issues/1158)) ([7fe8ec3](https://github.com/openai/openai-python/commit/7fe8ec3bf04ecf85e3bd5adf0d9992c051f87b81))
+
+## 1.12.0 (2024-02-08)
+
+Full Changelog: [v1.11.1...v1.12.0](https://github.com/openai/openai-python/compare/v1.11.1...v1.12.0)
+
+### Features
+
+* **api:** add `timestamp_granularities`, add `gpt-3.5-turbo-0125` model ([#1125](https://github.com/openai/openai-python/issues/1125)) ([1ecf8f6](https://github.com/openai/openai-python/commit/1ecf8f6b12323ed09fb6a2815c85b9533ee52a50))
+* **cli/images:** add support for `--model` arg ([#1132](https://github.com/openai/openai-python/issues/1132)) ([0d53866](https://github.com/openai/openai-python/commit/0d5386615cda7cd50d5db90de2119b84dba29519))
+
+
+### Bug Fixes
+
+* remove double brackets from timestamp_granularities param ([#1140](https://github.com/openai/openai-python/issues/1140)) ([3db0222](https://github.com/openai/openai-python/commit/3db022216a81fa86470b53ec1246669bc7b17897))
+* **types:** loosen most List params types to Iterable ([#1129](https://github.com/openai/openai-python/issues/1129)) ([bdb31a3](https://github.com/openai/openai-python/commit/bdb31a3b1db6ede4e02b3c951c4fd23f70260038))
+
+
+### Chores
+
+* **internal:** add lint command ([#1128](https://github.com/openai/openai-python/issues/1128)) ([4c021c0](https://github.com/openai/openai-python/commit/4c021c0ab0151c2ec092d860c9b60e22e658cd03))
+* **internal:** support serialising iterable types ([#1127](https://github.com/openai/openai-python/issues/1127)) ([98d4e59](https://github.com/openai/openai-python/commit/98d4e59afcf2d65d4e660d91eb9462240ef5cd63))
+
+
+### Documentation
+
+* add CONTRIBUTING.md ([#1138](https://github.com/openai/openai-python/issues/1138)) ([79c8f0e](https://github.com/openai/openai-python/commit/79c8f0e8bf5470e2e31e781e8d279331e89ddfbe))
+
+## 1.11.1 (2024-02-04)
+
+Full Changelog: [v1.11.0...v1.11.1](https://github.com/openai/openai-python/compare/v1.11.0...v1.11.1)
+
+### Bug Fixes
+
+* prevent crash when platform.architecture() is not allowed ([#1120](https://github.com/openai/openai-python/issues/1120)) ([9490554](https://github.com/openai/openai-python/commit/949055488488e93597cbc6c2cdd81f14f203e53b))
+
+## 1.11.0 (2024-02-03)
+
+Full Changelog: [v1.10.0...v1.11.0](https://github.com/openai/openai-python/compare/v1.10.0...v1.11.0)
+
+### Features
+
+* **client:** support parsing custom response types ([#1111](https://github.com/openai/openai-python/issues/1111)) ([da00fc3](https://github.com/openai/openai-python/commit/da00fc3f8e0ff13c6c3ca970e4bb86846304bd06))
+
+
+### Chores
+
+* **interal:** make link to api.md relative ([#1117](https://github.com/openai/openai-python/issues/1117)) ([4a10879](https://github.com/openai/openai-python/commit/4a108797e46293357601ce933e21b557a5dc6954))
+* **internal:** cast type in mocked test ([#1112](https://github.com/openai/openai-python/issues/1112)) ([99b21e1](https://github.com/openai/openai-python/commit/99b21e1fc681eb10e01d479cc043ad3c89272b1c))
+* **internal:** enable ruff type checking misuse lint rule ([#1106](https://github.com/openai/openai-python/issues/1106)) ([fa63e60](https://github.com/openai/openai-python/commit/fa63e605c82ec78f4fc27469c434b421a08fb909))
+* **internal:** support multipart data with overlapping keys ([#1104](https://github.com/openai/openai-python/issues/1104)) ([455bc9f](https://github.com/openai/openai-python/commit/455bc9f1fd018a32cd604eb4b400e05aa8d71822))
+* **internal:** support pre-release versioning ([#1113](https://github.com/openai/openai-python/issues/1113)) ([dea5b08](https://github.com/openai/openai-python/commit/dea5b08c28d47b331fd44f6920cf9fe322b68e51))
+
+## 1.10.0 (2024-01-25)
+
+Full Changelog: [v1.9.0...v1.10.0](https://github.com/openai/openai-python/compare/v1.9.0...v1.10.0)
+
+### Features
+
+* **api:** add text embeddings dimensions param ([#1103](https://github.com/openai/openai-python/issues/1103)) ([94abfa0](https://github.com/openai/openai-python/commit/94abfa0f988c199ea95a9c870c4ae9808823186d))
+* **azure:** proactively add audio/speech to deployment endpoints ([#1099](https://github.com/openai/openai-python/issues/1099)) ([fdf8742](https://github.com/openai/openai-python/commit/fdf87429b45ceb47ae6fd068ab70cc07bcb8da44))
+* **client:** enable follow redirects by default ([#1100](https://github.com/openai/openai-python/issues/1100)) ([d325b7c](https://github.com/openai/openai-python/commit/d325b7ca594c2abaada536249b5633b106943333))
+
+
+### Chores
+
+* **internal:** add internal helpers ([#1092](https://github.com/openai/openai-python/issues/1092)) ([629bde5](https://github.com/openai/openai-python/commit/629bde5800d84735e22d924db23109a141f48644))
+
+
+### Refactors
+
+* remove unnecessary builtin import ([#1094](https://github.com/openai/openai-python/issues/1094)) ([504b7d4](https://github.com/openai/openai-python/commit/504b7d4a0b4715bd49a1a076a8d4868e51fb3351))
+
+## 1.9.0 (2024-01-21)
+
+Full Changelog: [v1.8.0...v1.9.0](https://github.com/openai/openai-python/compare/v1.8.0...v1.9.0)
+
+### Features
+
+* **api:** add usage to runs and run steps ([#1090](https://github.com/openai/openai-python/issues/1090)) ([6c116df](https://github.com/openai/openai-python/commit/6c116dfbb0065d15050450df70e0e98fc8c80349))
+
+
+### Chores
+
+* **internal:** fix typing util function ([#1083](https://github.com/openai/openai-python/issues/1083)) ([3e60db6](https://github.com/openai/openai-python/commit/3e60db69f5d9187c4eb38451967259f534a36a82))
+* **internal:** remove redundant client test ([#1085](https://github.com/openai/openai-python/issues/1085)) ([947974f](https://github.com/openai/openai-python/commit/947974f5af726e252b7b12c863743e50f41b79d3))
+* **internal:** share client instances between all tests ([#1088](https://github.com/openai/openai-python/issues/1088)) ([05cd753](https://github.com/openai/openai-python/commit/05cd7531d40774d05c52b14dee54d137ac1452a3))
+* **internal:** speculative retry-after-ms support ([#1086](https://github.com/openai/openai-python/issues/1086)) ([36a7576](https://github.com/openai/openai-python/commit/36a7576a913be8509a3cf6f262543083b485136e))
+* lazy load raw resource class properties ([#1087](https://github.com/openai/openai-python/issues/1087)) ([d307127](https://github.com/openai/openai-python/commit/d30712744be07461e86763705c03c3495eadfc35))
+
+## 1.8.0 (2024-01-16)
+
+Full Changelog: [v1.7.2...v1.8.0](https://github.com/openai/openai-python/compare/v1.7.2...v1.8.0)
+
+### Features
+
+* **client:** add support for streaming raw responses ([#1072](https://github.com/openai/openai-python/issues/1072)) ([0e93c3b](https://github.com/openai/openai-python/commit/0e93c3b5bc9cfa041e91962fd82c0d9358125024))
+
+
+### Bug Fixes
+
+* **client:** ensure path params are non-empty ([#1075](https://github.com/openai/openai-python/issues/1075)) ([9a25149](https://github.com/openai/openai-python/commit/9a2514997c2ddccbec9df8be3773e83271f1dab8))
+* **proxy:** prevent recursion errors when debugging pycharm ([#1076](https://github.com/openai/openai-python/issues/1076)) ([3d78798](https://github.com/openai/openai-python/commit/3d787987cf7625b5b502cb0b63a37d55956eaf1d))
+
+
+### Chores
+
+* add write_to_file binary helper method ([#1077](https://github.com/openai/openai-python/issues/1077)) ([c622c6a](https://github.com/openai/openai-python/commit/c622c6aaf2ae7dc62bd6cdfc053204c5dc3293ac))
+
+## 1.7.2 (2024-01-12)
+
+Full Changelog: [v1.7.1...v1.7.2](https://github.com/openai/openai-python/compare/v1.7.1...v1.7.2)
+
+### Documentation
+
+* **readme:** improve api reference ([#1065](https://github.com/openai/openai-python/issues/1065)) ([745b9e0](https://github.com/openai/openai-python/commit/745b9e08ae0abb8bf4cd87ed40fa450d9ad81ede))
+
+
+### Refactors
+
+* **api:** remove deprecated endpoints ([#1067](https://github.com/openai/openai-python/issues/1067)) ([199ddcd](https://github.com/openai/openai-python/commit/199ddcdca00c136e4e0c3ff16521eff22acf2a1a))
+
+## 1.7.1 (2024-01-10)
+
+Full Changelog: [v1.7.0...v1.7.1](https://github.com/openai/openai-python/compare/v1.7.0...v1.7.1)
+
+### Chores
+
+* **client:** improve debug logging for failed requests ([#1060](https://github.com/openai/openai-python/issues/1060)) ([cf9a651](https://github.com/openai/openai-python/commit/cf9a6517b4aa0f24bcbe143c54ea908d43dfda92))
+
+## 1.7.0 (2024-01-08)
+
+Full Changelog: [v1.6.1...v1.7.0](https://github.com/openai/openai-python/compare/v1.6.1...v1.7.0)
+
+### Features
+
+* add `None` default value to nullable response properties ([#1043](https://github.com/openai/openai-python/issues/1043)) ([d94b4d3](https://github.com/openai/openai-python/commit/d94b4d3d0adcd1a49a1c25cc9730cef013a3e9c9))
+
+
+### Bug Fixes
+
+* **client:** correctly use custom http client auth ([#1028](https://github.com/openai/openai-python/issues/1028)) ([3d7d93e](https://github.com/openai/openai-python/commit/3d7d93e951eb7fe09cd9d94d10a62a020398c7f9))
+
+
+### Chores
+
+* add .keep files for examples and custom code directories ([#1057](https://github.com/openai/openai-python/issues/1057)) ([7524097](https://github.com/openai/openai-python/commit/7524097a47af0fdc8b560186ef3b111b59430741))
+* **internal:** bump license ([#1037](https://github.com/openai/openai-python/issues/1037)) ([d828527](https://github.com/openai/openai-python/commit/d828527540ebd97679075f48744818f06311b0cb))
+* **internal:** loosen type var restrictions ([#1049](https://github.com/openai/openai-python/issues/1049)) ([e00876b](https://github.com/openai/openai-python/commit/e00876b20b93038450eb317899d8775c7661b8eb))
+* **internal:** replace isort with ruff ([#1042](https://github.com/openai/openai-python/issues/1042)) ([f1fbc9c](https://github.com/openai/openai-python/commit/f1fbc9c0d62e7d89ab32c8bdfa39cd94b560690b))
+* **internal:** update formatting ([#1041](https://github.com/openai/openai-python/issues/1041)) ([2e9ecee](https://github.com/openai/openai-python/commit/2e9ecee9bdfa8ec33b1b1527d5187483b700fad3))
+* **src:** fix typos ([#988](https://github.com/openai/openai-python/issues/988)) ([6a8b806](https://github.com/openai/openai-python/commit/6a8b80624636f9a0e5ada151b2509710a6f74808))
+* use property declarations for resource members ([#1047](https://github.com/openai/openai-python/issues/1047)) ([131f6bc](https://github.com/openai/openai-python/commit/131f6bc6b0ccf79119096057079e10906b3d4678))
+
+
+### Documentation
+
+* fix docstring typos ([#1022](https://github.com/openai/openai-python/issues/1022)) ([ad3fd2c](https://github.com/openai/openai-python/commit/ad3fd2cd19bf91f94473e368554dff39a8f9ad16))
+* improve audio example to show how to stream to a file ([#1017](https://github.com/openai/openai-python/issues/1017)) ([d45ed7f](https://github.com/openai/openai-python/commit/d45ed7f0513b167555ae875f1877fa205c5790d2))
+
+## 1.6.1 (2023-12-22)
+
+Full Changelog: [v1.6.0...v1.6.1](https://github.com/openai/openai-python/compare/v1.6.0...v1.6.1)
+
+### Chores
+
+* **internal:** add bin script ([#1001](https://github.com/openai/openai-python/issues/1001)) ([99ffbda](https://github.com/openai/openai-python/commit/99ffbda279bf4c159511fb96b1d5bb688af25437))
+* **internal:** use ruff instead of black for formatting ([#1008](https://github.com/openai/openai-python/issues/1008)) ([ceaf9a0](https://github.com/openai/openai-python/commit/ceaf9a06fbd1a846756bb72cce50a69c8cc20bd3))
+
+## 1.6.0 (2023-12-19)
+
+Full Changelog: [v1.5.0...v1.6.0](https://github.com/openai/openai-python/compare/v1.5.0...v1.6.0)
+
+### Features
+
+* **api:** add additional instructions for runs ([#995](https://github.com/openai/openai-python/issues/995)) ([7bf9b75](https://github.com/openai/openai-python/commit/7bf9b75067905449e83e828c12eb384022cff6ca))
+
+
+### Chores
+
+* **cli:** fix typo in completions ([#985](https://github.com/openai/openai-python/issues/985)) ([d1e9e8f](https://github.com/openai/openai-python/commit/d1e9e8f24df366bb7b796c55a98247c025d229f5))
+* **cli:** fix typo in completions ([#986](https://github.com/openai/openai-python/issues/986)) ([626bc34](https://github.com/openai/openai-python/commit/626bc34d82a7057bac99f8b556f9e5f60c261ee7))
+* **internal:** fix binary response tests ([#983](https://github.com/openai/openai-python/issues/983)) ([cfb7e30](https://github.com/openai/openai-python/commit/cfb7e308393f2e912e959dd10d68096dd5b3ab9c))
+* **internal:** fix typos ([#993](https://github.com/openai/openai-python/issues/993)) ([3b338a4](https://github.com/openai/openai-python/commit/3b338a401b206618774291ff8137deb0cc5f6b4c))
+* **internal:** minor utils restructuring ([#992](https://github.com/openai/openai-python/issues/992)) ([5ba576a](https://github.com/openai/openai-python/commit/5ba576ae38d2c4c4d32a21933e0d68e0bc2f0d49))
+* **package:** bump minimum typing-extensions to 4.7 ([#994](https://github.com/openai/openai-python/issues/994)) ([0c2da84](https://github.com/openai/openai-python/commit/0c2da84badf416f8b2213983f68bd2b6f9e52f2b))
+* **streaming:** update constructor to use direct client names ([#991](https://github.com/openai/openai-python/issues/991)) ([6c3427d](https://github.com/openai/openai-python/commit/6c3427dac8c414658516aeb4caf5d5fd8b11097b))
+
+
+### Documentation
+
+* upgrade models in examples to latest version ([#989](https://github.com/openai/openai-python/issues/989)) ([cedd574](https://github.com/openai/openai-python/commit/cedd574e5611f3e71e92b523a72ba87bcfe546f1))
+
+## 1.5.0 (2023-12-17)
+
+Full Changelog: [v1.4.0...v1.5.0](https://github.com/openai/openai-python/compare/v1.4.0...v1.5.0)
+
+### Features
+
+* **api:** add token logprobs to chat completions ([#980](https://github.com/openai/openai-python/issues/980)) ([f50e962](https://github.com/openai/openai-python/commit/f50e962b930bd682a4299143b2995337e8571273))
+
+
+### Chores
+
+* **ci:** run release workflow once per day ([#978](https://github.com/openai/openai-python/issues/978)) ([215476a](https://github.com/openai/openai-python/commit/215476a0b99e0c92ab3e44ddd25de207af32d160))
+
+## 1.4.0 (2023-12-15)
+
+Full Changelog: [v1.3.9...v1.4.0](https://github.com/openai/openai-python/compare/v1.3.9...v1.4.0)
+
+### Features
+
+* **api:** add optional `name` argument + improve docs ([#972](https://github.com/openai/openai-python/issues/972)) ([7972010](https://github.com/openai/openai-python/commit/7972010615820099f662c02821cfbd59e7d6ea44))
+
+## 1.3.9 (2023-12-12)
+
+Full Changelog: [v1.3.8...v1.3.9](https://github.com/openai/openai-python/compare/v1.3.8...v1.3.9)
+
+### Documentation
+
+* improve README timeout comment ([#964](https://github.com/openai/openai-python/issues/964)) ([3c3ed5e](https://github.com/openai/openai-python/commit/3c3ed5edd938a9333e8d2fa47cb4b44178eef89a))
+* small Improvement in the async chat response code ([#959](https://github.com/openai/openai-python/issues/959)) ([fb9d0a3](https://github.com/openai/openai-python/commit/fb9d0a358fa232043d9d5c149b6a888d50127c7b))
+* small streaming readme improvements ([#962](https://github.com/openai/openai-python/issues/962)) ([f3be2e5](https://github.com/openai/openai-python/commit/f3be2e5cc24988471e6cedb3e34bdfd3123edc63))
+
+
+### Refactors
+
+* **client:** simplify cleanup ([#966](https://github.com/openai/openai-python/issues/966)) ([5c138f4](https://github.com/openai/openai-python/commit/5c138f4a7947e5b4aae8779fae78ca51269b355a))
+* simplify internal error handling ([#968](https://github.com/openai/openai-python/issues/968)) ([d187f6b](https://github.com/openai/openai-python/commit/d187f6b6e4e646cca39c6ca35c618aa5c1bfbd61))
+
+## 1.3.8 (2023-12-08)
+
+Full Changelog: [v1.3.7...v1.3.8](https://github.com/openai/openai-python/compare/v1.3.7...v1.3.8)
+
+### Bug Fixes
+
+* avoid leaking memory when Client.with_options is used ([#956](https://github.com/openai/openai-python/issues/956)) ([e37ecca](https://github.com/openai/openai-python/commit/e37ecca04040ce946822a7e40f5604532a59ee85))
+* **errors:** properly assign APIError.body ([#949](https://github.com/openai/openai-python/issues/949)) ([c70e194](https://github.com/openai/openai-python/commit/c70e194f0a253409ec851607ae5219e3b5a8c442))
+* **pagination:** use correct type hint for .object ([#943](https://github.com/openai/openai-python/issues/943)) ([23fe7ee](https://github.com/openai/openai-python/commit/23fe7ee48a71539b0d1e95ceff349264aae4090e))
+
+
+### Chores
+
+* **internal:** enable more lint rules ([#945](https://github.com/openai/openai-python/issues/945)) ([2c8add6](https://github.com/openai/openai-python/commit/2c8add64a261dea731bd162bb0cca222518d5440))
+* **internal:** reformat imports ([#939](https://github.com/openai/openai-python/issues/939)) ([ec65124](https://github.com/openai/openai-python/commit/ec651249de2f4e4cf959f816e1b52f03d3b1017a))
+* **internal:** reformat imports ([#944](https://github.com/openai/openai-python/issues/944)) ([5290639](https://github.com/openai/openai-python/commit/52906391c9b6633656ec7934e6bbac553ec667cd))
+* **internal:** update formatting ([#941](https://github.com/openai/openai-python/issues/941)) ([8e5a156](https://github.com/openai/openai-python/commit/8e5a156d555fe68731ba0604a7455cc03cb451ce))
+* **package:** lift anyio v4 restriction ([#927](https://github.com/openai/openai-python/issues/927)) ([be0438a](https://github.com/openai/openai-python/commit/be0438a2e399bb0e0a94907229d02fc61ab479c0))
+
+
+### Documentation
+
+* fix typo in example ([#950](https://github.com/openai/openai-python/issues/950)) ([54f0ce0](https://github.com/openai/openai-python/commit/54f0ce0000abe32e97ae400f2975c028b8a84273))
+
+## 1.3.7 (2023-12-01)
+
+Full Changelog: [v1.3.6...v1.3.7](https://github.com/openai/openai-python/compare/v1.3.6...v1.3.7)
+
+### Bug Fixes
+
+* **client:** correct base_url setter implementation ([#919](https://github.com/openai/openai-python/issues/919)) ([135d9cf](https://github.com/openai/openai-python/commit/135d9cf2820f1524764bf536a9322830bdcd5875))
+* **client:** don't cause crashes when inspecting the module ([#897](https://github.com/openai/openai-python/issues/897)) ([db029a5](https://github.com/openai/openai-python/commit/db029a596c90b1af4ef0bfb1cdf31f54b2f5755d))
+* **client:** ensure retried requests are closed ([#902](https://github.com/openai/openai-python/issues/902)) ([e025e6b](https://github.com/openai/openai-python/commit/e025e6bee44ea145d948869ef0c79bac0c376b9f))
+
+
+### Chores
+
+* **internal:** add tests for proxy change ([#899](https://github.com/openai/openai-python/issues/899)) ([71a13d0](https://github.com/openai/openai-python/commit/71a13d0c70d105b2b97720c72a1003b942cda2ae))
+* **internal:** remove unused type var ([#915](https://github.com/openai/openai-python/issues/915)) ([4233bcd](https://github.com/openai/openai-python/commit/4233bcdae5f467f10454fcc008a6e728fa846830))
+* **internal:** replace string concatenation with f-strings ([#908](https://github.com/openai/openai-python/issues/908)) ([663a8f6](https://github.com/openai/openai-python/commit/663a8f6dead5aa523d1e8779e75af1dabb1690c4))
+* **internal:** replace string concatenation with f-strings ([#909](https://github.com/openai/openai-python/issues/909)) ([caab767](https://github.com/openai/openai-python/commit/caab767156375114078cf8d85031863361326b5f))
+
+
+### Documentation
+
+* fix typo in readme ([#904](https://github.com/openai/openai-python/issues/904)) ([472cd44](https://github.com/openai/openai-python/commit/472cd44e45a45b0b4f12583a5402e8aeb121d7a2))
+* **readme:** update example snippets ([#907](https://github.com/openai/openai-python/issues/907)) ([bbb648e](https://github.com/openai/openai-python/commit/bbb648ef81eb11f81b457e2cbf33a832f4d29a76))
+
+## 1.3.6 (2023-11-28)
+
+Full Changelog: [v1.3.5...v1.3.6](https://github.com/openai/openai-python/compare/v1.3.5...v1.3.6)
+
+### Bug Fixes
+
+* **client:** add support for streaming binary responses ([#866](https://github.com/openai/openai-python/issues/866)) ([2470d25](https://github.com/openai/openai-python/commit/2470d251b751e92e8950bc9e3026965e9925ac1c))
+
+
+### Chores
+
+* **deps:** bump mypy to v1.7.1 ([#891](https://github.com/openai/openai-python/issues/891)) ([11fcb2a](https://github.com/openai/openai-python/commit/11fcb2a3cd4205b307c13c65ad47d9e315b0084d))
+* **internal:** send more detailed x-stainless headers ([#877](https://github.com/openai/openai-python/issues/877)) ([69e0549](https://github.com/openai/openai-python/commit/69e054947d587ff2548b101ece690d21d3c38f74))
+* revert binary streaming change ([#875](https://github.com/openai/openai-python/issues/875)) ([0a06d6a](https://github.com/openai/openai-python/commit/0a06d6a078c5ee898dae75bab4988e1a1936bfbf))
+
+
+### Documentation
+
+* **readme:** minor updates ([#894](https://github.com/openai/openai-python/issues/894)) ([5458457](https://github.com/openai/openai-python/commit/54584572df4c2a086172d812c6acb84e3405328b))
+* **readme:** update examples ([#893](https://github.com/openai/openai-python/issues/893)) ([124da87](https://github.com/openai/openai-python/commit/124da8720c44d40c083d29179f46a265761c1f4f))
+* update readme code snippet ([#890](https://github.com/openai/openai-python/issues/890)) ([c522f21](https://github.com/openai/openai-python/commit/c522f21e2a685454185d57e462e74a28499460f9))
+
 ## 1.3.5 (2023-11-21)
 
 Full Changelog: [v1.3.4...v1.3.5](https://github.com/openai/openai-python/compare/v1.3.4...v1.3.5)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000..c14e652328
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,128 @@
+## Setting up the environment
+
+### With Rye
+
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
+
+```sh
+$ ./scripts/bootstrap
+```
+
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
+
+```sh
+$ rye sync --all-features
+```
+
+You can then run scripts using `rye run python script.py` or by activating the virtual environment:
+
+```sh
+# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work
+$ source .venv/bin/activate
+
+# now you can omit the `rye run` prefix
+$ python script.py
+```
+
+### Without Rye
+
+Alternatively if you don't want to install `Rye`, you can stick with the standard `pip` setup by ensuring you have the Python version specified in `.python-version`, create a virtual environment however you desire and then install dependencies using this command:
+
+```sh
+$ pip install -r requirements-dev.lock
+```
+
+## Modifying/Adding code
+
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/openai/lib/` and `examples/` directories.
+
+## Adding and running examples
+
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
+
+```py
+# add an example to examples/<your-example>.py
+
+#!/usr/bin/env -S rye run python
+…
+```
+
+```sh
+$ chmod +x examples/<your-example>.py
+# run the example against your api
+$ ./examples/<your-example>.py
+```
+
+## Using the repository from source
+
+If you’d like to use the repository from source, you can either install from git or link to a cloned repository:
+
+To install via git:
+
+```sh
+$ pip install git+ssh://git@github.com/openai/openai-python.git
+```
+
+Alternatively, you can build from source and install the wheel file:
+
+Building this package will create two files in the `dist/` directory, a `.tar.gz` containing the source files and a `.whl` that can be used to install the package efficiently.
+
+To create a distributable version of the library, all you have to do is run this command:
+
+```sh
+$ rye build
+# or
+$ python -m build
+```
+
+Then to install:
+
+```sh
+$ pip install ./path-to-wheel-file.whl
+```
+
+## Running tests
+
+Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
+
+```sh
+# you will need npm installed
+$ npx prism mock path/to/your/openapi.yml
+```
+
+```sh
+$ ./scripts/test
+```
+
+## Linting and formatting
+
+This repository uses [ruff](https://github.com/astral-sh/ruff) and
+[black](https://github.com/psf/black) to format the code in the repository.
+
+To lint:
+
+```sh
+$ ./scripts/lint
+```
+
+To format and fix all ruff issues automatically:
+
+```sh
+$ ./scripts/format
+```
+
+## Publishing and releases
+
+Changes made to this repository via the automated release PR pipeline should publish to PyPI automatically. If
+the changes aren't made through the automated pipeline, you may want to make releases manually.
+
+### Publish with a GitHub workflow
+
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/openai/openai-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+
+### Publish manually
+
+If you need to manually release a package, you can run the `bin/publish-pypi` script with a `PYPI_TOKEN` set on
+the environment.
diff --git a/LICENSE b/LICENSE
index 7b1b36a644..f011417af6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2023 OpenAI
+   Copyright 2025 OpenAI
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index d916d3d0ea..b38ef578d2 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # OpenAI Python API library
 
-[![PyPI version](https://img.shields.io/pypi/v/openai.svg)](https://pypi.org/project/openai/)
+[![PyPI version](<https://img.shields.io/pypi/v/openai.svg?label=pypi%20(stable)>)](https://pypi.org/project/openai/)
 
-The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.7+
+The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.8+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
@@ -10,69 +10,134 @@ It is generated from our [OpenAPI specification](https://github.com/openai/opena
 
 ## Documentation
 
-The API documentation can be found [here](https://platform.openai.com/docs).
+The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs/api-reference). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
-> [!IMPORTANT]
-> The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
-
 ```sh
+# install from PyPI
 pip install openai
 ```
 
 ## Usage
 
-The full API of this library can be found in [api.md](https://www.github.com/openai/openai-python/blob/main/api.md).
+The full API of this library can be found in [api.md](api.md).
+
+The primary API for interacting with OpenAI models is the [Responses API](https://platform.openai.com/docs/api-reference/responses). You can generate text from the model with the code below.
 
 ```python
+import os
 from openai import OpenAI
 
 client = OpenAI(
-    # defaults to os.environ.get("OPENAI_API_KEY")
-    api_key="My API Key",
+    # This is the default and can be omitted
+    api_key=os.environ.get("OPENAI_API_KEY"),
 )
 
-chat_completion = client.chat.completions.create(
+response = client.responses.create(
+    model="gpt-4o",
+    instructions="You are a coding assistant that talks like a pirate.",
+    input="How do I check if a Python object is an instance of a class?",
+)
+
+print(response.output_text)
+```
+
+The previous standard (supported indefinitely) for generating text is the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat). You can use that API to generate text from the model with the code below.
+
+```python
+from openai import OpenAI
+
+client = OpenAI()
+
+completion = client.chat.completions.create(
+    model="gpt-4o",
     messages=[
+        {"role": "developer", "content": "Talk like a pirate."},
         {
             "role": "user",
-            "content": "Say this is a test",
-        }
+            "content": "How do I check if a Python object is an instance of a class?",
+        },
     ],
-    model="gpt-3.5-turbo",
 )
+
+print(completion.choices[0].message.content)
 ```
 
 While you can provide an `api_key` keyword argument,
 we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
 to add `OPENAI_API_KEY="My API Key"` to your `.env` file
-so that your API Key is not stored in source control.
+so that your API key is not stored in source control.
+[Get an API key here](https://platform.openai.com/settings/organization/api-keys).
+
+### Vision
+
+With an image URL:
+
+```python
+prompt = "What is in this image?"
+img_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/2023_06_08_Raccoon1.jpg/1599px-2023_06_08_Raccoon1.jpg"
+
+response = client.responses.create(
+    model="gpt-4o-mini",
+    input=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "input_text", "text": prompt},
+                {"type": "input_image", "image_url": f"{img_url}"},
+            ],
+        }
+    ],
+)
+```
+
+With the image as a base64 encoded string:
+
+```python
+import base64
+from openai import OpenAI
+
+client = OpenAI()
+
+prompt = "What is in this image?"
+with open("path/to/image.png", "rb") as image_file:
+    b64_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+response = client.responses.create(
+    model="gpt-4o-mini",
+    input=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "input_text", "text": prompt},
+                {"type": "input_image", "image_url": f"data:image/png;base64,{b64_image}"},
+            ],
+        }
+    ],
+)
+```
 
 ## Async usage
 
 Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
 
 ```python
+import os
 import asyncio
 from openai import AsyncOpenAI
 
 client = AsyncOpenAI(
-    # defaults to os.environ.get("OPENAI_API_KEY")
-    api_key="My API Key",
+    # This is the default and can be omitted
+    api_key=os.environ.get("OPENAI_API_KEY"),
 )
 
 
 async def main() -> None:
-    chat_completion = await client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": "Say this is a test",
-            }
-        ],
-        model="gpt-3.5-turbo",
+    response = await client.responses.create(
+        model="gpt-4o", input="Explain disestablishmentarianism to a smart five year old."
     )
+    print(response.output_text)
 
 
 asyncio.run(main())
@@ -80,7 +145,46 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
-## Streaming Responses
+### With aiohttp
+
+By default, the async client uses `httpx` for HTTP requests. However, for improved concurrency performance you may also use `aiohttp` as the HTTP backend.
+
+You can enable this by installing `aiohttp`:
+
+```sh
+# install from PyPI
+pip install openai[aiohttp]
+```
+
+Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
+
+```python
+import os
+import asyncio
+from openai import DefaultAioHttpClient
+from openai import AsyncOpenAI
+
+
+async def main() -> None:
+    async with AsyncOpenAI(
+        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
+        http_client=DefaultAioHttpClient(),
+    ) as client:
+        chat_completion = await client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Say this is a test",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+
+asyncio.run(main())
+```
+
+## Streaming responses
 
 We provide support for streaming responses using Server Side Events (SSE).
 
@@ -89,77 +193,106 @@ from openai import OpenAI
 
 client = OpenAI()
 
-stream = client.chat.completions.create(
-    model="gpt-4",
-    messages=[{"role": "user", "content": "Say this is a test"}],
+stream = client.responses.create(
+    model="gpt-4o",
+    input="Write a one-sentence bedtime story about a unicorn.",
     stream=True,
 )
-for part in stream:
-    print(part.choices[0].delta.content or "")
+
+for event in stream:
+    print(event)
 ```
 
 The async client uses the exact same interface.
 
 ```python
+import asyncio
 from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
-stream = await client.chat.completions.create(
-    prompt="Say this is a test",
-    messages=[{"role": "user", "content": "Say this is a test"}],
-    stream=True,
-)
-async for part in stream:
-    print(part.choices[0].delta.content or "")
+
+async def main():
+    stream = await client.responses.create(
+        model="gpt-4o",
+        input="Write a one-sentence bedtime story about a unicorn.",
+        stream=True,
+    )
+
+    async for event in stream:
+        print(event)
+
+
+asyncio.run(main())
 ```
 
-## Module-level client
+## Realtime API beta
 
-> [!IMPORTANT]
-> We highly recommend instantiating client instances instead of relying on the global client.
+The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a WebSocket connection.
 
-We also expose a global client instance that is accessible in a similar fashion to versions prior to v1.
+Under the hood the SDK uses the [`websockets`](https://websockets.readthedocs.io/en/stable/) library to manage connections.
+
+The Realtime API works through a combination of client-sent events and server-sent events. Clients can send events to do things like update session configuration or send text and audio inputs. Server events confirm when audio responses have completed, or when a text response from the model has been received. A full event reference can be found [here](https://platform.openai.com/docs/api-reference/realtime-client-events) and a guide can be found [here](https://platform.openai.com/docs/guides/realtime).
+
+Basic text based example:
 
 ```py
-import openai
+import asyncio
+from openai import AsyncOpenAI
 
-# optional; defaults to `os.environ['OPENAI_API_KEY']`
-openai.api_key = '...'
+async def main():
+    client = AsyncOpenAI()
 
-# all client options can be configured just like the `OpenAI` instantiation counterpart
-openai.base_url = "https://.../"
-openai.default_headers = {"x-foo": "true"}
+    async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
+        await connection.session.update(session={'modalities': ['text']})
 
-completion = openai.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.choices[0].message.content)
+        await connection.conversation.item.create(
+            item={
+                "type": "message",
+                "role": "user",
+                "content": [{"type": "input_text", "text": "Say hello!"}],
+            }
+        )
+        await connection.response.create()
+
+        async for event in connection:
+            if event.type == 'response.text.delta':
+                print(event.delta, flush=True, end="")
+
+            elif event.type == 'response.text.done':
+                print()
+
+            elif event.type == "response.done":
+                break
+
+asyncio.run(main())
 ```
 
-The API is the exact same as the standard client instance based API.
+However the real magic of the Realtime API is handling audio inputs / outputs, see this example [TUI script](https://github.com/openai/openai-python/blob/main/examples/realtime/push_to_talk_app.py) for a fully fledged example.
 
-This is intended to be used within REPLs or notebooks for faster iteration, **not** in application code.
+### Realtime error handling
 
-We recommend that you always instantiate a client (e.g., with `client = OpenAI()`) in application code because:
+Whenever an error occurs, the Realtime API will send an [`error` event](https://platform.openai.com/docs/guides/realtime-model-capabilities#error-handling) and the connection will stay open and remain usable. This means you need to handle it yourself, as _no errors are raised directly_ by the SDK when an `error` event comes in.
 
-- It can be difficult to reason about where client options are configured
-- It's not possible to change certain client options without potentially causing race conditions
-- It's harder to mock for testing purposes
-- It's not possible to control cleanup of network connections
+```py
+client = AsyncOpenAI()
+
+async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
+    ...
+    async for event in connection:
+        if event.type == 'error':
+            print(event.error.type)
+            print(event.error.code)
+            print(event.error.event_id)
+            print(event.error.message)
+```
 
 ## Using types
 
-Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev), which provide helper methods for things like:
+Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
 
-- Serializing back into JSON, `model.model_dump_json(indent=2, exclude_unset=True)`
-- Converting to a dictionary, `model.model_dump(exclude_unset=True)`
+- Serializing back into JSON, `model.to_json()`
+- Converting to a dictionary, `model.to_dict()`
 
 Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
 
@@ -170,7 +303,7 @@ List methods in the OpenAI API are paginated.
 This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
 
 ```python
-import openai
+from openai import OpenAI
 
 client = OpenAI()
 
@@ -188,7 +321,7 @@ Or, asynchronously:
 
 ```python
 import asyncio
-import openai
+from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
@@ -243,21 +376,21 @@ from openai import OpenAI
 
 client = OpenAI()
 
-completion = client.chat.completions.create(
-    messages=[
+response = client.chat.responses.create(
+    input=[
         {
             "role": "user",
-            "content": "Can you generate an example json object describing a fruit?",
+            "content": "How much ?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
     response_format={"type": "json_object"},
 )
 ```
 
-## File Uploads
+## File uploads
 
-Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
+Request parameters that correspond to file uploads can be passed as `bytes`, or a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
 
 ```python
 from pathlib import Path
@@ -273,6 +406,86 @@ client.files.create(
 
 The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
 
+## Webhook Verification
+
+Verifying webhook signatures is _optional but encouraged_.
+
+For more information about webhooks, see [the API docs](https://platform.openai.com/docs/guides/webhooks).
+
+### Parsing webhook payloads
+
+For most use cases, you will likely want to verify the webhook and parse the payload at the same time. To achieve this, we provide the method `client.webhooks.unwrap()`, which parses a webhook request and verifies that it was sent by OpenAI. This method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). The `.unwrap()` method will parse this JSON for you into an event object after verifying the webhook was sent from OpenAI.
+
+```python
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI()  # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+    request_body = request.get_data(as_text=True)
+
+    try:
+        event = client.webhooks.unwrap(request_body, request.headers)
+
+        if event.type == "response.completed":
+            print("Response completed:", event.data)
+        elif event.type == "response.failed":
+            print("Response failed:", event.data)
+        else:
+            print("Unhandled event type:", event.type)
+
+        return "ok"
+    except Exception as e:
+        print("Invalid signature:", e)
+        return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+    app.run(port=8000)
+```
+
+### Verifying webhook payloads directly
+
+In some cases, you may want to verify the webhook separately from parsing the payload. If you prefer to handle these steps separately, we provide the method `client.webhooks.verify_signature()` to _only verify_ the signature of a webhook request. Like `.unwrap()`, this method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). You will then need to parse the body after verifying the signature.
+
+```python
+import json
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI()  # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+    request_body = request.get_data(as_text=True)
+
+    try:
+        client.webhooks.verify_signature(request_body, request.headers)
+
+        # Parse the body after verification
+        event = json.loads(request_body)
+        print("Verified event:", event)
+
+        return "ok"
+    except Exception as e:
+        print("Invalid signature:", e)
+        return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+    app.run(port=8000)
+```
+
 ## Handling errors
 
 When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `openai.APIConnectionError` is raised.
@@ -289,8 +502,9 @@ from openai import OpenAI
 client = OpenAI()
 
 try:
-    client.fine_tunes.create(
-        training_file="file-XGinujblHPwGLSztz8cPS8XY",
+    client.fine_tuning.jobs.create(
+        model="gpt-4o",
+        training_file="file-abc123",
     )
 except openai.APIConnectionError as e:
     print("The server could not be reached")
@@ -303,7 +517,7 @@ except openai.APIStatusError as e:
     print(e.response)
 ```
 
-Error codes are as followed:
+Error codes are as follows:
 
 | Status Code | Error Type                 |
 | ----------- | -------------------------- |
@@ -316,7 +530,40 @@ Error codes are as followed:
 | >=500       | `InternalServerError`      |
 | N/A         | `APIConnectionError`       |
 
-### Retries
+## Request IDs
+
+> For more information on debugging requests, see [these docs](https://platform.openai.com/docs/api-reference/debugging-requests)
+
+All object responses in the SDK provide a `_request_id` property which is added from the `x-request-id` response header so that you can quickly log failing requests and report them back to OpenAI.
+
+```python
+response = await client.responses.create(
+    model="gpt-4o-mini",
+    input="Say 'this is a test'.",
+)
+print(response._request_id)  # req_123
+```
+
+Note that unlike other properties that use an `_` prefix, the `_request_id` property
+_is_ public. Unless documented otherwise, _all_ other `_` prefix properties,
+methods and modules are _private_.
+
+> [!IMPORTANT]  
+> If you need to access request IDs for failed requests you must catch the `APIStatusError` exception
+
+```python
+import openai
+
+try:
+    completion = await client.chat.completions.create(
+        messages=[{"role": "user", "content": "Say this is a test"}], model="gpt-4"
+    )
+except openai.APIStatusError as exc:
+    print(exc.request_id)  # req_123
+    raise exc
+```
+
+## Retries
 
 Certain errors are automatically retried 2 times by default, with a short exponential backoff.
 Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
@@ -338,24 +585,24 @@ client.with_options(max_retries=5).chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "How can I get the name of the current day in Node.js?",
+            "content": "How can I get the name of the current day in JavaScript?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
-### Timeouts
+## Timeouts
 
 By default requests time out after 10 minutes. You can configure this with a `timeout` option,
-which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
 
 ```python
 from openai import OpenAI
 
 # Configure the default for all requests:
 client = OpenAI(
-    # default is 60s
+    # 20 seconds (default is 10 minutes)
     timeout=20.0,
 )
 
@@ -365,14 +612,14 @@ client = OpenAI(
 )
 
 # Override per-request:
-client.with_options(timeout=5 * 1000).chat.completions.create(
+client.with_options(timeout=5.0).chat.completions.create(
     messages=[
         {
             "role": "user",
             "content": "How can I list all files in a directory using Python?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -386,12 +633,14 @@ Note that requests that time out are [retried twice by default](#retries).
 
 We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
 
-You can enable logging by setting the environment variable `OPENAI_LOG` to `debug`.
+You can enable logging by setting the environment variable `OPENAI_LOG` to `info`.
 
 ```shell
-$ export OPENAI_LOG=debug
+$ export OPENAI_LOG=info
 ```
 
+Or to `debug` for more verbose logging.
+
 ### How to tell whether `None` means `null` or missing
 
 In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
@@ -406,7 +655,7 @@ if response.my_field is None:
 
 ### Accessing raw response data (e.g. headers)
 
-The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call.
+The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
 
 ```py
 from openai import OpenAI
@@ -417,7 +666,7 @@ response = client.chat.completions.with_raw_response.create(
         "role": "user",
         "content": "Say this is a test",
     }],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 print(response.headers.get('X-My-Header'))
 
@@ -425,37 +674,120 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion)
 ```
 
-These methods return an [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
+These methods return a [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+
+For the sync client this will mostly be the same with the exception
+of `content` & `text` will be methods instead of properties. In the
+async client, all methods will be async.
+
+A migration script will be provided & the migration in general should
+be smooth.
+
+#### `.with_streaming_response`
+
+The above interface eagerly reads the full response body when you make the request, which may not always be what you want.
+
+To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
+
+As such, `.with_streaming_response` methods return a different [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object, and the async client returns an [`AsyncAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
+
+```python
+with client.chat.completions.with_streaming_response.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-4o",
+) as response:
+    print(response.headers.get("X-My-Header"))
+
+    for line in response.iter_lines():
+        print(line)
+```
+
+The context manager is required so that the response will reliably be closed.
+
+### Making custom/undocumented requests
+
+This library is typed for convenient access to the documented API.
+
+If you need to access undocumented endpoints, params, or response properties, the library can still be used.
+
+#### Undocumented endpoints
+
+To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
+http verbs. Options on the client will be respected (such as retries) when making this request.
+
+```py
+import httpx
+
+response = client.post(
+    "/foo",
+    cast_to=httpx.Response,
+    body={"my_param": True},
+)
+
+print(response.headers.get("x-foo"))
+```
+
+#### Undocumented request params
+
+If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` request
+options.
+
+#### Undocumented response properties
+
+To access undocumented response properties, you can access the extra fields like `response.unknown_prop`. You
+can also get all the extra fields on the Pydantic model as a dict with
+[`response.model_extra`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_extra).
 
 ### Configuring the HTTP client
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for proxies
-- Custom transports
-- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
+- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
 import httpx
-from openai import OpenAI
+from openai import OpenAI, DefaultHttpxClient
 
 client = OpenAI(
     # Or use the `OPENAI_BASE_URL` env var
-    base_url="http://my.test.server.example.com:8083/",
-    http_client=httpx.Client(
-        proxies="http://my.test.proxy.example.com/",
+    base_url="http://my.test.server.example.com:8083/v1",
+    http_client=DefaultHttpxClient(
+        proxy="http://my.test.proxy.example.com/",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )
 ```
 
+You can also customize the client on a per-request basis by using `with_options()`:
+
+```python
+client.with_options(http_client=DefaultHttpxClient(...))
+```
+
 ### Managing HTTP resources
 
 By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
+```py
+from openai import OpenAI
+
+with OpenAI() as client:
+  # make requests here
+  ...
+
+# HTTP client is now closed
+```
+
 ## Microsoft Azure OpenAI
 
-To use this library with [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview), use the `AzureOpenAI`
+To use this library with [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/overview), use the `AzureOpenAI`
 class instead of the `OpenAI` class.
 
 > [!IMPORTANT]
@@ -467,9 +799,9 @@ from openai import AzureOpenAI
 
 # gets the API Key from environment variable AZURE_OPENAI_API_KEY
 client = AzureOpenAI(
-    # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-    api_version="2023-07-01-preview"
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
+    # https://learn.microsoft.com/azure/ai-services/openai/reference#rest-api-versioning
+    api_version="2023-07-01-preview",
+    # https://learn.microsoft.com/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
     azure_endpoint="https://example-endpoint.openai.azure.com/",
 )
 
@@ -482,7 +814,7 @@ completion = client.chat.completions.create(
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
 ```
 
 In addition to the options provided in the base `OpenAI` client, the following options are provided:
@@ -493,20 +825,35 @@ In addition to the options provided in the base `OpenAI` client, the following o
 - `azure_ad_token` (or the `AZURE_OPENAI_AD_TOKEN` environment variable)
 - `azure_ad_token_provider`
 
-An example of using the client with Azure Active Directory can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
+An example of using the client with Microsoft Entra ID (formerly known as Azure Active Directory) can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
 
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
 
 1. Changes that only affect static types, without breaking runtime behavior.
-2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
 3. Changes that we do not expect to impact the vast majority of users in practice.
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
 We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import openai
+print(openai.__version__)
+```
+
 ## Requirements
 
-Python 3.7 or higher.
+Python 3.8 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000..4adb0c54f1
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,29 @@
+# Security Policy
+
+## Reporting Security Issues
+
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+
+To report a security issue, please contact the Stainless team at security@stainless.com.
+
+## Responsible Disclosure
+
+We appreciate the efforts of security researchers and individuals who help us maintain the security of
+SDKs we generate. If you believe you have found a security vulnerability, please adhere to responsible
+disclosure practices by allowing us a reasonable amount of time to investigate and address the issue
+before making any information public.
+
+## Reporting Non-SDK Related Security Issues
+
+If you encounter security issues that are not directly related to SDKs but pertain to the services
+or products provided by OpenAI, please follow the respective company's security reporting guidelines.
+
+### OpenAI Terms and Policies
+
+Our Security Policy can be found at [Security Policy URL](https://openai.com/policies/coordinated-vulnerability-disclosure-policy).
+
+Please contact disclosure@openai.com for any questions or concerns regarding the security of our services.
+
+---
+
+Thank you for helping us keep the SDKs and systems they interact with secure.
diff --git a/api.md b/api.md
index a7ee177411..abf0de481d 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,22 @@
 # Shared Types
 
 ```python
-from openai.types import FunctionDefinition, FunctionParameters
+from openai.types import (
+    AllModels,
+    ChatModel,
+    ComparisonFilter,
+    CompoundFilter,
+    ErrorObject,
+    FunctionDefinition,
+    FunctionParameters,
+    Metadata,
+    Reasoning,
+    ReasoningEffort,
+    ResponseFormatJSONObject,
+    ResponseFormatJSONSchema,
+    ResponseFormatText,
+    ResponsesModel,
+)
 ```
 
 # Completions
@@ -18,6 +33,12 @@ Methods:
 
 # Chat
 
+Types:
+
+```python
+from openai.types import ChatModel
+```
+
 ## Completions
 
 Types:
@@ -26,47 +47,57 @@ Types:
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
+    ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
+    ChatCompletionDeleted,
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
     ChatCompletionMessage,
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCall,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
+    ChatCompletionPredictionContent,
     ChatCompletionRole,
+    ChatCompletionStoreMessage,
+    ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
+    ChatCompletionTokenLogprob,
     ChatCompletionTool,
     ChatCompletionToolChoiceOption,
     ChatCompletionToolMessageParam,
     ChatCompletionUserMessageParam,
+    ChatCompletionReasoningEffort,
 )
 ```
 
 Methods:
 
-- <code title="post /chat/completions">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
-
-# Edits
+- <code title="post /chat/completions">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions/completions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions/{completion_id}">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions/completions.py">retrieve</a>(completion_id) -> <a href="http://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="post /chat/completions/{completion_id}">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions/completions.py">update</a>(completion_id, \*\*<a href="http://github.com/src/openai/types/chat/completion_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions/completions.py">list</a>(\*\*<a href="http://github.com/src/openai/types/chat/completion_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/chat/chat_completion.py">SyncCursorPage[ChatCompletion]</a></code>
+- <code title="delete /chat/completions/{completion_id}">client.chat.completions.<a href="http://github.com/src/openai/resources/chat/completions/completions.py">delete</a>(completion_id) -> <a href="http://github.com/src/openai/types/chat/chat_completion_deleted.py">ChatCompletionDeleted</a></code>
 
-Types:
-
-```python
-from openai.types import Edit
-```
+### Messages
 
 Methods:
 
-- <code title="post /edits">client.edits.<a href="http://github.com/src/openai/resources/edits.py">create</a>(\*\*<a href="http://github.com/src/openai/types/edit_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/edit.py">Edit</a></code>
+- <code title="get /chat/completions/{completion_id}/messages">client.chat.completions.messages.<a href="http://github.com/src/openai/resources/chat/completions/messages.py">list</a>(completion_id, \*\*<a href="http://github.com/src/openai/types/chat/completions/message_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/chat/chat_completion_store_message.py">SyncCursorPage[ChatCompletionStoreMessage]</a></code>
 
 # Embeddings
 
 Types:
 
 ```python
-from openai.types import CreateEmbeddingResponse, Embedding
+from openai.types import CreateEmbeddingResponse, Embedding, EmbeddingModel
 ```
 
 Methods:
@@ -78,17 +109,17 @@ Methods:
 Types:
 
 ```python
-from openai.types import FileContent, FileDeleted, FileObject
+from openai.types import FileContent, FileDeleted, FileObject, FilePurpose
 ```
 
 Methods:
 
 - <code title="post /files">client.files.<a href="http://github.com/src/openai/resources/files.py">create</a>(\*\*<a href="http://github.com/src/openai/types/file_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/file_object.py">FileObject</a></code>
 - <code title="get /files/{file_id}">client.files.<a href="http://github.com/src/openai/resources/files.py">retrieve</a>(file_id) -> <a href="http://github.com/src/openai/types/file_object.py">FileObject</a></code>
-- <code title="get /files">client.files.<a href="http://github.com/src/openai/resources/files.py">list</a>(\*\*<a href="http://github.com/src/openai/types/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/file_object.py">SyncPage[FileObject]</a></code>
+- <code title="get /files">client.files.<a href="http://github.com/src/openai/resources/files.py">list</a>(\*\*<a href="http://github.com/src/openai/types/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/file_object.py">SyncCursorPage[FileObject]</a></code>
 - <code title="delete /files/{file_id}">client.files.<a href="http://github.com/src/openai/resources/files.py">delete</a>(file_id) -> <a href="http://github.com/src/openai/types/file_deleted.py">FileDeleted</a></code>
 - <code title="get /files/{file_id}/content">client.files.<a href="http://github.com/src/openai/resources/files.py">content</a>(file_id) -> HttpxBinaryResponseContent</code>
-- <code title="get /files/{file_id}/content">client.files.<a href="http://github.com/src/openai/resources/files.py">retrieve_content</a>(file_id) -> str</code>
+- <code title="get /files/{file_id}/content">client.files.<a href="http://github.com/src/openai/resources/files.py">retrieve_content</a>(file_id) -> <a href="http://github.com/src/openai/types/file_content.py">str</a></code>
 - <code>client.files.<a href="http://github.com/src/openai/resources/files.py">wait_for_processing</a>(\*args) -> FileObject</code>
 
 # Images
@@ -96,7 +127,7 @@ Methods:
 Types:
 
 ```python
-from openai.types import Image, ImagesResponse
+from openai.types import Image, ImageModel, ImagesResponse
 ```
 
 Methods:
@@ -107,32 +138,54 @@ Methods:
 
 # Audio
 
+Types:
+
+```python
+from openai.types import AudioModel, AudioResponseFormat
+```
+
 ## Transcriptions
 
 Types:
 
 ```python
-from openai.types.audio import Transcription
+from openai.types.audio import (
+    Transcription,
+    TranscriptionInclude,
+    TranscriptionSegment,
+    TranscriptionStreamEvent,
+    TranscriptionTextDeltaEvent,
+    TranscriptionTextDoneEvent,
+    TranscriptionVerbose,
+    TranscriptionWord,
+    TranscriptionCreateResponse,
+)
 ```
 
 Methods:
 
-- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="http://github.com/src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/audio/transcription.py">Transcription</a></code>
+- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="http://github.com/src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/audio/transcription_create_response.py">TranscriptionCreateResponse</a></code>
 
 ## Translations
 
 Types:
 
 ```python
-from openai.types.audio import Translation
+from openai.types.audio import Translation, TranslationVerbose, TranslationCreateResponse
 ```
 
 Methods:
 
-- <code title="post /audio/translations">client.audio.translations.<a href="http://github.com/src/openai/resources/audio/translations.py">create</a>(\*\*<a href="http://github.com/src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/audio/translation.py">Translation</a></code>
+- <code title="post /audio/translations">client.audio.translations.<a href="http://github.com/src/openai/resources/audio/translations.py">create</a>(\*\*<a href="http://github.com/src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/audio/translation_create_response.py">TranslationCreateResponse</a></code>
 
 ## Speech
 
+Types:
+
+```python
+from openai.types.audio import SpeechModel
+```
+
 Methods:
 
 - <code title="post /audio/speech">client.audio.speech.<a href="http://github.com/src/openai/resources/audio/speech.py">create</a>(\*\*<a href="http://github.com/src/openai/types/audio/speech_create_params.py">params</a>) -> HttpxBinaryResponseContent</code>
@@ -142,7 +195,14 @@ Methods:
 Types:
 
 ```python
-from openai.types import Moderation, ModerationCreateResponse
+from openai.types import (
+    Moderation,
+    ModerationImageURLInput,
+    ModerationModel,
+    ModerationMultiModalInput,
+    ModerationTextInput,
+    ModerationCreateResponse,
+)
 ```
 
 Methods:
@@ -165,77 +225,330 @@ Methods:
 
 # FineTuning
 
+## Methods
+
+Types:
+
+```python
+from openai.types.fine_tuning import (
+    DpoHyperparameters,
+    DpoMethod,
+    ReinforcementHyperparameters,
+    ReinforcementMethod,
+    SupervisedHyperparameters,
+    SupervisedMethod,
+)
+```
+
 ## Jobs
 
 Types:
 
 ```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+    FineTuningJobWandbIntegration,
+    FineTuningJobWandbIntegrationObject,
+    FineTuningJobIntegration,
+)
+```
+
+Methods:
+
+- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">create</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">list</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="http://github.com/src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/pause">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">pause</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/resume">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/jobs.py">resume</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+```
+
+Methods:
+
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="http://github.com/src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="http://github.com/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
+
+## Checkpoints
+
+### Permissions
+
+Types:
+
+```python
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionRetrieveResponse,
+    PermissionDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="http://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">create</a>(fine_tuned_model_checkpoint, \*\*<a href="http://github.com/src/openai/types/fine_tuning/checkpoints/permission_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/checkpoints/permission_create_response.py">SyncPage[PermissionCreateResponse]</a></code>
+- <code title="get /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="http://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">retrieve</a>(fine_tuned_model_checkpoint, \*\*<a href="http://github.com/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py">PermissionRetrieveResponse</a></code>
+- <code title="delete /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}">client.fine_tuning.checkpoints.permissions.<a href="http://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">delete</a>(permission_id, \*, fine_tuned_model_checkpoint) -> <a href="http://github.com/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py">PermissionDeleteResponse</a></code>
+
+## Alpha
+
+### Graders
+
+Types:
+
+```python
+from openai.types.fine_tuning.alpha import GraderRunResponse, GraderValidateResponse
+```
+
+Methods:
+
+- <code title="post /fine_tuning/alpha/graders/run">client.fine_tuning.alpha.graders.<a href="http://github.com/src/openai/resources/fine_tuning/alpha/graders.py">run</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/alpha/grader_run_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/alpha/grader_run_response.py">GraderRunResponse</a></code>
+- <code title="post /fine_tuning/alpha/graders/validate">client.fine_tuning.alpha.graders.<a href="http://github.com/src/openai/resources/fine_tuning/alpha/graders.py">validate</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/alpha/grader_validate_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/alpha/grader_validate_response.py">GraderValidateResponse</a></code>
+
+# Graders
+
+## GraderModels
+
+Types:
+
+```python
+from openai.types.graders import (
+    LabelModelGrader,
+    MultiGrader,
+    PythonGrader,
+    ScoreModelGrader,
+    StringCheckGrader,
+    TextSimilarityGrader,
+)
+```
+
+# VectorStores
+
+Types:
+
+```python
+from openai.types import (
+    AutoFileChunkingStrategyParam,
+    FileChunkingStrategy,
+    FileChunkingStrategyParam,
+    OtherFileChunkingStrategyObject,
+    StaticFileChunkingStrategy,
+    StaticFileChunkingStrategyObject,
+    StaticFileChunkingStrategyObjectParam,
+    VectorStore,
+    VectorStoreDeleted,
+    VectorStoreSearchResponse,
+)
+```
+
+Methods:
+
+- <code title="post /vector_stores">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">create</a>(\*\*<a href="http://github.com/src/openai/types/vector_store_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores/{vector_store_id}">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="http://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="post /vector_stores/{vector_store_id}">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_store_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">list</a>(\*\*<a href="http://github.com/src/openai/types/vector_store_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_store.py">SyncCursorPage[VectorStore]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="http://github.com/src/openai/types/vector_store_deleted.py">VectorStoreDeleted</a></code>
+- <code title="post /vector_stores/{vector_store_id}/search">client.vector_stores.<a href="http://github.com/src/openai/resources/vector_stores/vector_stores.py">search</a>(vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_store_search_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_store_search_response.py">SyncPage[VectorStoreSearchResponse]</a></code>
+
+## Files
+
+Types:
+
+```python
+from openai.types.vector_stores import VectorStoreFile, VectorStoreFileDeleted, FileContentResponse
+```
+
+Methods:
+
+- <code title="post /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_stores/file_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="post /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_stores/file_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_stores/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="http://github.com/src/openai/types/vector_stores/file_content_response.py">SyncPage[FileContentResponse]</a></code>
+- <code>client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">create_and_poll</a>(\*args) -> VectorStoreFile</code>
+- <code>client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">poll</a>(\*args) -> VectorStoreFile</code>
+- <code>client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">upload</a>(\*args) -> VectorStoreFile</code>
+- <code>client.vector_stores.files.<a href="http://github.com/src/openai/resources/vector_stores/files.py">upload_and_poll</a>(\*args) -> VectorStoreFile</code>
+
+## FileBatches
+
+Types:
+
+```python
+from openai.types.vector_stores import VectorStoreFileBatch
 ```
 
 Methods:
 
-- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs.py">create</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs.py">list</a>(\*\*<a href="http://github.com/src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
-- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="http://github.com/src/openai/resources/fine_tuning/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="http://github.com/src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches">client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_stores/file_batch_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="http://github.com/src/openai/types/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="http://github.com/src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code>client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">create_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code>client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code>client.vector_stores.file_batches.<a href="http://github.com/src/openai/resources/vector_stores/file_batches.py">upload_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
 
-# FineTunes
+# Webhooks
 
 Types:
 
 ```python
-from openai.types import FineTune, FineTuneEvent, FineTuneEventsListResponse
+from openai.types.webhooks import (
+    BatchCancelledWebhookEvent,
+    BatchCompletedWebhookEvent,
+    BatchExpiredWebhookEvent,
+    BatchFailedWebhookEvent,
+    EvalRunCanceledWebhookEvent,
+    EvalRunFailedWebhookEvent,
+    EvalRunSucceededWebhookEvent,
+    FineTuningJobCancelledWebhookEvent,
+    FineTuningJobFailedWebhookEvent,
+    FineTuningJobSucceededWebhookEvent,
+    ResponseCancelledWebhookEvent,
+    ResponseCompletedWebhookEvent,
+    ResponseFailedWebhookEvent,
+    ResponseIncompleteWebhookEvent,
+    UnwrapWebhookEvent,
+)
 ```
 
 Methods:
 
-- <code title="post /fine-tunes">client.fine_tunes.<a href="http://github.com/src/openai/resources/fine_tunes.py">create</a>(\*\*<a href="http://github.com/src/openai/types/fine_tune_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tune.py">FineTune</a></code>
-- <code title="get /fine-tunes/{fine_tune_id}">client.fine_tunes.<a href="http://github.com/src/openai/resources/fine_tunes.py">retrieve</a>(fine_tune_id) -> <a href="http://github.com/src/openai/types/fine_tune.py">FineTune</a></code>
-- <code title="get /fine-tunes">client.fine_tunes.<a href="http://github.com/src/openai/resources/fine_tunes.py">list</a>() -> <a href="http://github.com/src/openai/types/fine_tune.py">SyncPage[FineTune]</a></code>
-- <code title="post /fine-tunes/{fine_tune_id}/cancel">client.fine_tunes.<a href="http://github.com/src/openai/resources/fine_tunes.py">cancel</a>(fine_tune_id) -> <a href="http://github.com/src/openai/types/fine_tune.py">FineTune</a></code>
-- <code title="get /fine-tunes/{fine_tune_id}/events">client.fine_tunes.<a href="http://github.com/src/openai/resources/fine_tunes.py">list_events</a>(fine_tune_id, \*\*<a href="http://github.com/src/openai/types/fine_tune_list_events_params.py">params</a>) -> <a href="http://github.com/src/openai/types/fine_tune_events_list_response.py">FineTuneEventsListResponse</a></code>
+- <code>client.webhooks.<a href="http://github.com/src/openai/resources/webhooks.py">unwrap</a>(payload, headers, \*, secret) -> UnwrapWebhookEvent</code>
+- <code>client.webhooks.<a href="http://github.com/src/openai/resources/webhooks.py">verify_signature</a>(payload, headers, \*, secret, tolerance) -> None</code>
 
 # Beta
 
-## Assistants
+## Realtime
 
 Types:
 
 ```python
-from openai.types.beta import Assistant, AssistantDeleted
+from openai.types.beta.realtime import (
+    ConversationCreatedEvent,
+    ConversationItem,
+    ConversationItemContent,
+    ConversationItemCreateEvent,
+    ConversationItemCreatedEvent,
+    ConversationItemDeleteEvent,
+    ConversationItemDeletedEvent,
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+    ConversationItemInputAudioTranscriptionDeltaEvent,
+    ConversationItemInputAudioTranscriptionFailedEvent,
+    ConversationItemRetrieveEvent,
+    ConversationItemTruncateEvent,
+    ConversationItemTruncatedEvent,
+    ConversationItemWithReference,
+    ErrorEvent,
+    InputAudioBufferAppendEvent,
+    InputAudioBufferClearEvent,
+    InputAudioBufferClearedEvent,
+    InputAudioBufferCommitEvent,
+    InputAudioBufferCommittedEvent,
+    InputAudioBufferSpeechStartedEvent,
+    InputAudioBufferSpeechStoppedEvent,
+    RateLimitsUpdatedEvent,
+    RealtimeClientEvent,
+    RealtimeResponse,
+    RealtimeResponseStatus,
+    RealtimeResponseUsage,
+    RealtimeServerEvent,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCancelEvent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreateEvent,
+    ResponseCreatedEvent,
+    ResponseDoneEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    SessionCreatedEvent,
+    SessionUpdateEvent,
+    SessionUpdatedEvent,
+    TranscriptionSessionUpdate,
+    TranscriptionSessionUpdatedEvent,
+)
+```
+
+### Sessions
+
+Types:
+
+```python
+from openai.types.beta.realtime import Session, SessionCreateResponse
 ```
 
 Methods:
 
-- <code title="post /assistants">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants/assistants.py">create</a>(\*\*<a href="http://github.com/src/openai/types/beta/assistant_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="get /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants/assistants.py">retrieve</a>(assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="post /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants/assistants.py">update</a>(assistant_id, \*\*<a href="http://github.com/src/openai/types/beta/assistant_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
-- <code title="get /assistants">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants/assistants.py">list</a>(\*\*<a href="http://github.com/src/openai/types/beta/assistant_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">SyncCursorPage[Assistant]</a></code>
-- <code title="delete /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants/assistants.py">delete</a>(assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistant_deleted.py">AssistantDeleted</a></code>
+- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="http://github.com/src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
+
+### TranscriptionSessions
 
-### Files
+Types:
+
+```python
+from openai.types.beta.realtime import TranscriptionSession
+```
+
+Methods:
+
+- <code title="post /realtime/transcription_sessions">client.beta.realtime.transcription_sessions.<a href="http://github.com/src/openai/resources/beta/realtime/transcription_sessions.py">create</a>(\*\*<a href="http://github.com/src/openai/types/beta/realtime/transcription_session_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/realtime/transcription_session.py">TranscriptionSession</a></code>
+
+## Assistants
 
 Types:
 
 ```python
-from openai.types.beta.assistants import AssistantFile, FileDeleteResponse
+from openai.types.beta import (
+    Assistant,
+    AssistantDeleted,
+    AssistantStreamEvent,
+    AssistantTool,
+    CodeInterpreterTool,
+    FileSearchTool,
+    FunctionTool,
+    MessageStreamEvent,
+    RunStepStreamEvent,
+    RunStreamEvent,
+    ThreadStreamEvent,
+)
 ```
 
 Methods:
 
-- <code title="post /assistants/{assistant_id}/files">client.beta.assistants.files.<a href="http://github.com/src/openai/resources/beta/assistants/files.py">create</a>(assistant_id, \*\*<a href="http://github.com/src/openai/types/beta/assistants/file_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistants/assistant_file.py">AssistantFile</a></code>
-- <code title="get /assistants/{assistant_id}/files/{file_id}">client.beta.assistants.files.<a href="http://github.com/src/openai/resources/beta/assistants/files.py">retrieve</a>(file_id, \*, assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistants/assistant_file.py">AssistantFile</a></code>
-- <code title="get /assistants/{assistant_id}/files">client.beta.assistants.files.<a href="http://github.com/src/openai/resources/beta/assistants/files.py">list</a>(assistant_id, \*\*<a href="http://github.com/src/openai/types/beta/assistants/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistants/assistant_file.py">SyncCursorPage[AssistantFile]</a></code>
-- <code title="delete /assistants/{assistant_id}/files/{file_id}">client.beta.assistants.files.<a href="http://github.com/src/openai/resources/beta/assistants/files.py">delete</a>(file_id, \*, assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistants/file_delete_response.py">FileDeleteResponse</a></code>
+- <code title="post /assistants">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants.py">create</a>(\*\*<a href="http://github.com/src/openai/types/beta/assistant_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="get /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants.py">retrieve</a>(assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="post /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants.py">update</a>(assistant_id, \*\*<a href="http://github.com/src/openai/types/beta/assistant_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">Assistant</a></code>
+- <code title="get /assistants">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants.py">list</a>(\*\*<a href="http://github.com/src/openai/types/beta/assistant_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/assistant.py">SyncCursorPage[Assistant]</a></code>
+- <code title="delete /assistants/{assistant_id}">client.beta.assistants.<a href="http://github.com/src/openai/resources/beta/assistants.py">delete</a>(assistant_id) -> <a href="http://github.com/src/openai/types/beta/assistant_deleted.py">AssistantDeleted</a></code>
 
 ## Threads
 
 Types:
 
 ```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+    AssistantResponseFormatOption,
+    AssistantToolChoice,
+    AssistantToolChoiceFunction,
+    AssistantToolChoiceOption,
+    Thread,
+    ThreadDeleted,
+)
 ```
 
 Methods:
@@ -245,13 +558,15 @@ Methods:
 - <code title="post /threads/{thread_id}">client.beta.threads.<a href="http://github.com/src/openai/resources/beta/threads/threads.py">update</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/thread_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/thread.py">Thread</a></code>
 - <code title="delete /threads/{thread_id}">client.beta.threads.<a href="http://github.com/src/openai/resources/beta/threads/threads.py">delete</a>(thread_id) -> <a href="http://github.com/src/openai/types/beta/thread_deleted.py">ThreadDeleted</a></code>
 - <code title="post /threads/runs">client.beta.threads.<a href="http://github.com/src/openai/resources/beta/threads/threads.py">create_and_run</a>(\*\*<a href="http://github.com/src/openai/types/beta/thread_create_and_run_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.<a href="http://github.com/src/openai/resources/beta/threads/threads.py">create_and_run_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.<a href="http://github.com/src/openai/resources/beta/threads/threads.py">create_and_run_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 ### Runs
 
 Types:
 
 ```python
-from openai.types.beta.threads import RequiredActionFunctionToolCall, Run
+from openai.types.beta.threads import RequiredActionFunctionToolCall, Run, RunStatus
 ```
 
 Methods:
@@ -262,6 +577,12 @@ Methods:
 - <code title="get /threads/{thread_id}/runs">client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">list</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/run_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/run.py">SyncCursorPage[Run]</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/cancel">client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">cancel</a>(run_id, \*, thread_id) -> <a href="http://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/submit_tool_outputs">client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs</a>(run_id, \*, thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/run_submit_tool_outputs_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">create_and_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">create_and_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_and_poll</a>(\*args) -> Run</code>
+- <code>client.beta.threads.runs.<a href="http://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 #### Steps
 
@@ -269,18 +590,30 @@ Types:
 
 ```python
 from openai.types.beta.threads.runs import (
-    CodeToolCall,
+    CodeInterpreterLogs,
+    CodeInterpreterOutputImage,
+    CodeInterpreterToolCall,
+    CodeInterpreterToolCallDelta,
+    FileSearchToolCall,
+    FileSearchToolCallDelta,
     FunctionToolCall,
+    FunctionToolCallDelta,
     MessageCreationStepDetails,
-    RetrievalToolCall,
     RunStep,
+    RunStepDelta,
+    RunStepDeltaEvent,
+    RunStepDeltaMessageDelta,
+    RunStepInclude,
+    ToolCall,
+    ToolCallDelta,
+    ToolCallDeltaObject,
     ToolCallsStepDetails,
 )
 ```
 
 Methods:
 
-- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="http://github.com/src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id) -> <a href="http://github.com/src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
+- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="http://github.com/src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/runs/step_retrieve_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
 - <code title="get /threads/{thread_id}/runs/{run_id}/steps">client.beta.threads.runs.steps.<a href="http://github.com/src/openai/resources/beta/threads/runs/steps.py">list</a>(run_id, \*, thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/runs/step_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/runs/run_step.py">SyncCursorPage[RunStep]</a></code>
 
 ### Messages
@@ -289,29 +622,307 @@ Types:
 
 ```python
 from openai.types.beta.threads import (
-    MessageContentImageFile,
-    MessageContentText,
-    ThreadMessage,
-    ThreadMessageDeleted,
+    Annotation,
+    AnnotationDelta,
+    FileCitationAnnotation,
+    FileCitationDeltaAnnotation,
+    FilePathAnnotation,
+    FilePathDeltaAnnotation,
+    ImageFile,
+    ImageFileContentBlock,
+    ImageFileDelta,
+    ImageFileDeltaBlock,
+    ImageURL,
+    ImageURLContentBlock,
+    ImageURLDelta,
+    ImageURLDeltaBlock,
+    Message,
+    MessageContent,
+    MessageContentDelta,
+    MessageContentPartParam,
+    MessageDeleted,
+    MessageDelta,
+    MessageDeltaEvent,
+    RefusalContentBlock,
+    RefusalDeltaBlock,
+    Text,
+    TextContentBlock,
+    TextContentBlockParam,
+    TextDelta,
+    TextDeltaBlock,
+)
+```
+
+Methods:
+
+- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages.py">create</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="http://github.com/src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages.py">list</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/message.py">SyncCursorPage[Message]</a></code>
+- <code title="delete /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages.py">delete</a>(message_id, \*, thread_id) -> <a href="http://github.com/src/openai/types/beta/threads/message_deleted.py">MessageDeleted</a></code>
+
+# Batches
+
+Types:
+
+```python
+from openai.types import Batch, BatchError, BatchRequestCounts
+```
+
+Methods:
+
+- <code title="post /batches">client.batches.<a href="http://github.com/src/openai/resources/batches.py">create</a>(\*\*<a href="http://github.com/src/openai/types/batch_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/batch.py">Batch</a></code>
+- <code title="get /batches/{batch_id}">client.batches.<a href="http://github.com/src/openai/resources/batches.py">retrieve</a>(batch_id) -> <a href="http://github.com/src/openai/types/batch.py">Batch</a></code>
+- <code title="get /batches">client.batches.<a href="http://github.com/src/openai/resources/batches.py">list</a>(\*\*<a href="http://github.com/src/openai/types/batch_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/batch.py">SyncCursorPage[Batch]</a></code>
+- <code title="post /batches/{batch_id}/cancel">client.batches.<a href="http://github.com/src/openai/resources/batches.py">cancel</a>(batch_id) -> <a href="http://github.com/src/openai/types/batch.py">Batch</a></code>
+
+# Uploads
+
+Types:
+
+```python
+from openai.types import Upload
+```
+
+Methods:
+
+- <code title="post /uploads">client.uploads.<a href="http://github.com/src/openai/resources/uploads/uploads.py">create</a>(\*\*<a href="http://github.com/src/openai/types/upload_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/cancel">client.uploads.<a href="http://github.com/src/openai/resources/uploads/uploads.py">cancel</a>(upload_id) -> <a href="http://github.com/src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/complete">client.uploads.<a href="http://github.com/src/openai/resources/uploads/uploads.py">complete</a>(upload_id, \*\*<a href="http://github.com/src/openai/types/upload_complete_params.py">params</a>) -> <a href="http://github.com/src/openai/types/upload.py">Upload</a></code>
+
+## Parts
+
+Types:
+
+```python
+from openai.types.uploads import UploadPart
+```
+
+Methods:
+
+- <code title="post /uploads/{upload_id}/parts">client.uploads.parts.<a href="http://github.com/src/openai/resources/uploads/parts.py">create</a>(upload_id, \*\*<a href="http://github.com/src/openai/types/uploads/part_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/uploads/upload_part.py">UploadPart</a></code>
+
+# Responses
+
+Types:
+
+```python
+from openai.types.responses import (
+    ComputerTool,
+    EasyInputMessage,
+    FileSearchTool,
+    FunctionTool,
+    Response,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallInterpretingEvent,
+    ResponseCodeInterpreterToolCall,
+    ResponseCompletedEvent,
+    ResponseComputerToolCall,
+    ResponseComputerToolCallOutputItem,
+    ResponseComputerToolCallOutputScreenshot,
+    ResponseContent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreatedEvent,
+    ResponseError,
+    ResponseErrorEvent,
+    ResponseFailedEvent,
+    ResponseFileSearchCallCompletedEvent,
+    ResponseFileSearchCallInProgressEvent,
+    ResponseFileSearchCallSearchingEvent,
+    ResponseFileSearchToolCall,
+    ResponseFormatTextConfig,
+    ResponseFormatTextJSONSchemaConfig,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseFunctionToolCall,
+    ResponseFunctionToolCallItem,
+    ResponseFunctionToolCallOutputItem,
+    ResponseFunctionWebSearch,
+    ResponseImageGenCallCompletedEvent,
+    ResponseImageGenCallGeneratingEvent,
+    ResponseImageGenCallInProgressEvent,
+    ResponseImageGenCallPartialImageEvent,
+    ResponseInProgressEvent,
+    ResponseIncludable,
+    ResponseIncompleteEvent,
+    ResponseInput,
+    ResponseInputAudio,
+    ResponseInputContent,
+    ResponseInputFile,
+    ResponseInputImage,
+    ResponseInputItem,
+    ResponseInputMessageContentList,
+    ResponseInputMessageItem,
+    ResponseInputText,
+    ResponseItem,
+    ResponseMcpCallArgumentsDeltaEvent,
+    ResponseMcpCallArgumentsDoneEvent,
+    ResponseMcpCallCompletedEvent,
+    ResponseMcpCallFailedEvent,
+    ResponseMcpCallInProgressEvent,
+    ResponseMcpListToolsCompletedEvent,
+    ResponseMcpListToolsFailedEvent,
+    ResponseMcpListToolsInProgressEvent,
+    ResponseOutputAudio,
+    ResponseOutputItem,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseOutputMessage,
+    ResponseOutputRefusal,
+    ResponseOutputText,
+    ResponseOutputTextAnnotationAddedEvent,
+    ResponsePrompt,
+    ResponseQueuedEvent,
+    ResponseReasoningDeltaEvent,
+    ResponseReasoningDoneEvent,
+    ResponseReasoningItem,
+    ResponseReasoningSummaryDeltaEvent,
+    ResponseReasoningSummaryDoneEvent,
+    ResponseReasoningSummaryPartAddedEvent,
+    ResponseReasoningSummaryPartDoneEvent,
+    ResponseReasoningSummaryTextDeltaEvent,
+    ResponseReasoningSummaryTextDoneEvent,
+    ResponseRefusalDeltaEvent,
+    ResponseRefusalDoneEvent,
+    ResponseStatus,
+    ResponseStreamEvent,
+    ResponseTextConfig,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    ResponseUsage,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseWebSearchCallInProgressEvent,
+    ResponseWebSearchCallSearchingEvent,
+    Tool,
+    ToolChoiceFunction,
+    ToolChoiceMcp,
+    ToolChoiceOptions,
+    ToolChoiceTypes,
+    WebSearchTool,
 )
 ```
 
 Methods:
 
-- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages/messages.py">create</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="http://github.com/src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="http://github.com/src/openai/resources/beta/threads/messages/messages.py">list</a>(thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/thread_message.py">SyncCursorPage[ThreadMessage]</a></code>
+- <code title="post /responses">client.responses.<a href="http://github.com/src/openai/resources/responses/responses.py">create</a>(\*\*<a href="http://github.com/src/openai/types/responses/response_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/responses/response.py">Response</a></code>
+- <code title="get /responses/{response_id}">client.responses.<a href="http://github.com/src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="http://github.com/src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="http://github.com/src/openai/types/responses/response.py">Response</a></code>
+- <code title="delete /responses/{response_id}">client.responses.<a href="http://github.com/src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
+- <code title="post /responses/{response_id}/cancel">client.responses.<a href="http://github.com/src/openai/resources/responses/responses.py">cancel</a>(response_id) -> <a href="http://github.com/src/openai/types/responses/response.py">Response</a></code>
 
-#### Files
+## InputItems
 
 Types:
 
 ```python
-from openai.types.beta.threads.messages import MessageFile
+from openai.types.responses import ResponseItemList
 ```
 
 Methods:
 
-- <code title="get /threads/{thread_id}/messages/{message_id}/files/{file_id}">client.beta.threads.messages.files.<a href="http://github.com/src/openai/resources/beta/threads/messages/files.py">retrieve</a>(file_id, \*, thread_id, message_id) -> <a href="http://github.com/src/openai/types/beta/threads/messages/message_file.py">MessageFile</a></code>
-- <code title="get /threads/{thread_id}/messages/{message_id}/files">client.beta.threads.messages.files.<a href="http://github.com/src/openai/resources/beta/threads/messages/files.py">list</a>(message_id, \*, thread_id, \*\*<a href="http://github.com/src/openai/types/beta/threads/messages/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/beta/threads/messages/message_file.py">SyncCursorPage[MessageFile]</a></code>
+- <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="http://github.com/src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="http://github.com/src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
+
+# Evals
+
+Types:
+
+```python
+from openai.types import (
+    EvalCustomDataSourceConfig,
+    EvalStoredCompletionsDataSourceConfig,
+    EvalCreateResponse,
+    EvalRetrieveResponse,
+    EvalUpdateResponse,
+    EvalListResponse,
+    EvalDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals">client.evals.<a href="http://github.com/src/openai/resources/evals/evals.py">create</a>(\*\*<a href="http://github.com/src/openai/types/eval_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/eval_create_response.py">EvalCreateResponse</a></code>
+- <code title="get /evals/{eval_id}">client.evals.<a href="http://github.com/src/openai/resources/evals/evals.py">retrieve</a>(eval_id) -> <a href="http://github.com/src/openai/types/eval_retrieve_response.py">EvalRetrieveResponse</a></code>
+- <code title="post /evals/{eval_id}">client.evals.<a href="http://github.com/src/openai/resources/evals/evals.py">update</a>(eval_id, \*\*<a href="http://github.com/src/openai/types/eval_update_params.py">params</a>) -> <a href="http://github.com/src/openai/types/eval_update_response.py">EvalUpdateResponse</a></code>
+- <code title="get /evals">client.evals.<a href="http://github.com/src/openai/resources/evals/evals.py">list</a>(\*\*<a href="http://github.com/src/openai/types/eval_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/eval_list_response.py">SyncCursorPage[EvalListResponse]</a></code>
+- <code title="delete /evals/{eval_id}">client.evals.<a href="http://github.com/src/openai/resources/evals/evals.py">delete</a>(eval_id) -> <a href="http://github.com/src/openai/types/eval_delete_response.py">EvalDeleteResponse</a></code>
+
+## Runs
+
+Types:
+
+```python
+from openai.types.evals import (
+    CreateEvalCompletionsRunDataSource,
+    CreateEvalJSONLRunDataSource,
+    EvalAPIError,
+    RunCreateResponse,
+    RunRetrieveResponse,
+    RunListResponse,
+    RunDeleteResponse,
+    RunCancelResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals/{eval_id}/runs">client.evals.runs.<a href="http://github.com/src/openai/resources/evals/runs/runs.py">create</a>(eval_id, \*\*<a href="http://github.com/src/openai/types/evals/run_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/evals/run_create_response.py">RunCreateResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="http://github.com/src/openai/resources/evals/runs/runs.py">retrieve</a>(run_id, \*, eval_id) -> <a href="http://github.com/src/openai/types/evals/run_retrieve_response.py">RunRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs">client.evals.runs.<a href="http://github.com/src/openai/resources/evals/runs/runs.py">list</a>(eval_id, \*\*<a href="http://github.com/src/openai/types/evals/run_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/evals/run_list_response.py">SyncCursorPage[RunListResponse]</a></code>
+- <code title="delete /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="http://github.com/src/openai/resources/evals/runs/runs.py">delete</a>(run_id, \*, eval_id) -> <a href="http://github.com/src/openai/types/evals/run_delete_response.py">RunDeleteResponse</a></code>
+- <code title="post /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="http://github.com/src/openai/resources/evals/runs/runs.py">cancel</a>(run_id, \*, eval_id) -> <a href="http://github.com/src/openai/types/evals/run_cancel_response.py">RunCancelResponse</a></code>
+
+### OutputItems
+
+Types:
+
+```python
+from openai.types.evals.runs import OutputItemRetrieveResponse, OutputItemListResponse
+```
+
+Methods:
+
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}">client.evals.runs.output_items.<a href="http://github.com/src/openai/resources/evals/runs/output_items.py">retrieve</a>(output_item_id, \*, eval_id, run_id) -> <a href="http://github.com/src/openai/types/evals/runs/output_item_retrieve_response.py">OutputItemRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items">client.evals.runs.output_items.<a href="http://github.com/src/openai/resources/evals/runs/output_items.py">list</a>(run_id, \*, eval_id, \*\*<a href="http://github.com/src/openai/types/evals/runs/output_item_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/evals/runs/output_item_list_response.py">SyncCursorPage[OutputItemListResponse]</a></code>
+
+# Containers
+
+Types:
+
+```python
+from openai.types import ContainerCreateResponse, ContainerRetrieveResponse, ContainerListResponse
+```
+
+Methods:
+
+- <code title="post /containers">client.containers.<a href="http://github.com/src/openai/resources/containers/containers.py">create</a>(\*\*<a href="http://github.com/src/openai/types/container_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/container_create_response.py">ContainerCreateResponse</a></code>
+- <code title="get /containers/{container_id}">client.containers.<a href="http://github.com/src/openai/resources/containers/containers.py">retrieve</a>(container_id) -> <a href="http://github.com/src/openai/types/container_retrieve_response.py">ContainerRetrieveResponse</a></code>
+- <code title="get /containers">client.containers.<a href="http://github.com/src/openai/resources/containers/containers.py">list</a>(\*\*<a href="http://github.com/src/openai/types/container_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/container_list_response.py">SyncCursorPage[ContainerListResponse]</a></code>
+- <code title="delete /containers/{container_id}">client.containers.<a href="http://github.com/src/openai/resources/containers/containers.py">delete</a>(container_id) -> None</code>
+
+## Files
+
+Types:
+
+```python
+from openai.types.containers import FileCreateResponse, FileRetrieveResponse, FileListResponse
+```
+
+Methods:
+
+- <code title="post /containers/{container_id}/files">client.containers.files.<a href="http://github.com/src/openai/resources/containers/files/files.py">create</a>(container_id, \*\*<a href="http://github.com/src/openai/types/containers/file_create_params.py">params</a>) -> <a href="http://github.com/src/openai/types/containers/file_create_response.py">FileCreateResponse</a></code>
+- <code title="get /containers/{container_id}/files/{file_id}">client.containers.files.<a href="http://github.com/src/openai/resources/containers/files/files.py">retrieve</a>(file_id, \*, container_id) -> <a href="http://github.com/src/openai/types/containers/file_retrieve_response.py">FileRetrieveResponse</a></code>
+- <code title="get /containers/{container_id}/files">client.containers.files.<a href="http://github.com/src/openai/resources/containers/files/files.py">list</a>(container_id, \*\*<a href="http://github.com/src/openai/types/containers/file_list_params.py">params</a>) -> <a href="http://github.com/src/openai/types/containers/file_list_response.py">SyncCursorPage[FileListResponse]</a></code>
+- <code title="delete /containers/{container_id}/files/{file_id}">client.containers.files.<a href="http://github.com/src/openai/resources/containers/files/files.py">delete</a>(file_id, \*, container_id) -> None</code>
+
+### Content
+
+Methods:
+
+- <code title="get /containers/{container_id}/files/{file_id}/content">client.containers.files.content.<a href="http://github.com/src/openai/resources/containers/files/content.py">retrieve</a>(file_id, \*, container_id) -> HttpxBinaryResponseContent</code>
diff --git a/bin/check-release-environment b/bin/check-release-environment
index b0c8d34f0c..044ed525d1 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -7,12 +7,12 @@ if [ -z "${STAINLESS_API_KEY}" ]; then
 fi
 
 if [ -z "${PYPI_TOKEN}" ]; then
-  errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+  errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do
diff --git a/bin/check-test-server b/bin/check-test-server
deleted file mode 100755
index a6fa34950d..0000000000
--- a/bin/check-test-server
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[0;33m'
-NC='\033[0m' # No Color
-
-function prism_is_running() {
-  curl --silent "http://localhost:4010/" >/dev/null 2>&1
-}
-
-function is_overriding_api_base_url() {
-  [ -n "$TEST_API_BASE_URL" ]
-}
-
-if is_overriding_api_base_url ; then
-  # If someone is running the tests against the live API, we can trust they know
-  # what they're doing and exit early.
-  echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
-
-  exit 0
-elif prism_is_running ; then
-  echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
-  echo
-
-  exit 0
-else
-  echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
-  echo -e "running against your OpenAPI spec."
-  echo
-  echo -e "${YELLOW}To fix:${NC}"
-  echo
-  echo -e "1. Install Prism (requires Node 16+):"
-  echo
-  echo -e "  With npm:"
-  echo -e "    \$ ${YELLOW}npm install -g @stoplight/prism-cli${NC}"
-  echo
-  echo -e "  With yarn:"
-  echo -e "    \$ ${YELLOW}yarn global add @stoplight/prism-cli${NC}"
-  echo
-  echo -e "2. Run the mock server"
-  echo
-  echo -e "  To run the server, pass in the path of your OpenAPI"
-  echo -e "  spec to the prism command:"
-  echo
-  echo -e "    \$ ${YELLOW}prism mock path/to/your.openapi.yml${NC}"
-  echo
-
-  exit 1
-fi
diff --git a/bin/test b/bin/test
deleted file mode 100755
index 60ede7a842..0000000000
--- a/bin/test
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-bin/check-test-server && rye run pytest "$@"
diff --git a/examples/.keep b/examples/.keep
new file mode 100644
index 0000000000..d8c73e937a
--- /dev/null
+++ b/examples/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store example files demonstrating usage of this SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/examples/assistant.py b/examples/assistant.py
deleted file mode 100644
index c5fbb82a3a..0000000000
--- a/examples/assistant.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import time
-
-import openai
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = openai.OpenAI()
-
-assistant = client.beta.assistants.create(
-    name="Math Tutor",
-    instructions="You are a personal math tutor. Write and run code to answer math questions.",
-    tools=[{"type": "code_interpreter"}],
-    model="gpt-4-1106-preview",
-)
-
-thread = client.beta.threads.create()
-
-message = client.beta.threads.messages.create(
-    thread_id=thread.id,
-    role="user",
-    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
-)
-
-run = client.beta.threads.runs.create(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-)
-
-print("checking assistant status. ")
-while True:
-    run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
-
-    if run.status == "completed":
-        print("done!")
-        messages = client.beta.threads.messages.list(thread_id=thread.id)
-
-        print("messages: ")
-        for message in messages:
-            assert message.content[0].type == "text"
-            print({"role": message.role, "message": message.content[0].text.value})
-
-        client.beta.assistants.delete(assistant.id)
-
-        break
-    else:
-        print("in progress...")
-        time.sleep(5)
diff --git a/examples/async_demo.py b/examples/async_demo.py
index 92c267c38f..793b4e43fb 100755
--- a/examples/async_demo.py
+++ b/examples/async_demo.py
@@ -10,7 +10,7 @@
 
 async def main() -> None:
     stream = await client.completions.create(
-        model="text-davinci-003",
+        model="gpt-3.5-turbo-instruct",
         prompt="Say this is a test",
         stream=True,
     )
diff --git a/examples/audio.py b/examples/audio.py
index a5f535dcd6..af41fe601b 100755
--- a/examples/audio.py
+++ b/examples/audio.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env rye run python
 
 from pathlib import Path
 
@@ -12,14 +12,18 @@
 
 def main() -> None:
     # Create text-to-speech audio file
-    response = openai.audio.speech.create(
-        model="tts-1", voice="alloy", input="the quick brown fox jumped over the lazy dogs"
-    )
-
-    response.stream_to_file(speech_file_path)
+    with openai.audio.speech.with_streaming_response.create(
+        model="tts-1",
+        voice="alloy",
+        input="the quick brown fox jumped over the lazy dogs",
+    ) as response:
+        response.stream_to_file(speech_file_path)
 
     # Create transcription from audio file
-    transcription = openai.audio.transcriptions.create(model="whisper-1", file=speech_file_path)
+    transcription = openai.audio.transcriptions.create(
+        model="whisper-1",
+        file=speech_file_path,
+    )
     print(transcription.text)
 
     # Create translation from audio file
diff --git a/examples/azure.py b/examples/azure.py
index a28b8cc433..6936c4cb0e 100755
--- a/examples/azure.py
+++ b/examples/azure.py
@@ -20,7 +20,7 @@
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
 
 
 deployment_client = AzureOpenAI(
@@ -40,4 +40,4 @@
         },
     ],
 )
-print(completion.model_dump_json(indent=2))
+print(completion.to_json())
diff --git a/examples/azure_ad.py b/examples/azure_ad.py
index f13079dd04..67e2f23713 100755
--- a/examples/azure_ad.py
+++ b/examples/azure_ad.py
@@ -1,30 +1,67 @@
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+import asyncio
 
-from openai import AzureOpenAI
+from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI, AzureADTokenProvider, AsyncAzureADTokenProvider
 
-token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
+scopes = "https://cognitiveservices.azure.com/.default"
 
-
-# may change in the future
+# May change in the future
 # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
 api_version = "2023-07-01-preview"
 
 # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
 endpoint = "https://my-resource.openai.azure.com/"
 
-client = AzureOpenAI(
-    api_version=api_version,
-    azure_endpoint=endpoint,
-    azure_ad_token_provider=token_provider,
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.model_dump_json(indent=2))
+deployment_name = "deployment-name"  # e.g. gpt-35-instant
+
+
+def sync_main() -> None:
+    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+
+    token_provider: AzureADTokenProvider = get_bearer_token_provider(DefaultAzureCredential(), scopes)
+
+    client = AzureOpenAI(
+        api_version=api_version,
+        azure_endpoint=endpoint,
+        azure_ad_token_provider=token_provider,
+    )
+
+    completion = client.chat.completions.create(
+        model=deployment_name,
+        messages=[
+            {
+                "role": "user",
+                "content": "How do I output all files in a directory using Python?",
+            }
+        ],
+    )
+
+    print(completion.to_json())
+
+
+async def async_main() -> None:
+    from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
+
+    token_provider: AsyncAzureADTokenProvider = get_bearer_token_provider(DefaultAzureCredential(), scopes)
+
+    client = AsyncAzureOpenAI(
+        api_version=api_version,
+        azure_endpoint=endpoint,
+        azure_ad_token_provider=token_provider,
+    )
+
+    completion = await client.chat.completions.create(
+        model=deployment_name,
+        messages=[
+            {
+                "role": "user",
+                "content": "How do I output all files in a directory using Python?",
+            }
+        ],
+    )
+
+    print(completion.to_json())
+
+
+sync_main()
+
+asyncio.run(async_main())
diff --git a/examples/demo.py b/examples/demo.py
index 37830e3e97..ac1710f3e0 100755
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -36,3 +36,18 @@
 
     print(chunk.choices[0].delta.content, end="")
 print()
+
+# Response headers:
+print("----- custom response headers test -----")
+response = client.chat.completions.with_raw_response.create(
+    model="gpt-4",
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+)
+completion = response.parse()
+print(response.request_id)
+print(completion.choices[0].message.content)
diff --git a/examples/generate_file.sh b/examples/generate_file.sh
new file mode 100644
index 0000000000..ff07d096be
--- /dev/null
+++ b/examples/generate_file.sh
@@ -0,0 +1,10 @@
+# generate a text file with random data for testing file uploads
+wanted_size=$((1024*2048*512))
+file_size=$(( ((wanted_size/12)+1)*12 ))
+read_size=$((file_size*3/4))
+
+echo "wanted=$wanted_size file=$file_size read=$read_size"
+
+dd if=/dev/urandom bs=$read_size count=1 | base64 > /tmp/small_test_file.txt
+
+truncate -s "$wanted_size" /tmp/big_test_file.txt 
diff --git a/examples/parsing.py b/examples/parsing.py
new file mode 100644
index 0000000000..906ce974c1
--- /dev/null
+++ b/examples/parsing.py
@@ -0,0 +1,36 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+
+completion = client.chat.completions.parse(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {"role": "system", "content": "You are a helpful math tutor."},
+        {"role": "user", "content": "solve 8x + 31 = 2"},
+    ],
+    response_format=MathResponse,
+)
+
+message = completion.choices[0].message
+if message.parsed:
+    rich.print(message.parsed.steps)
+
+    print("answer: ", message.parsed.final_answer)
+else:
+    print(message.refusal)
diff --git a/examples/parsing_stream.py b/examples/parsing_stream.py
new file mode 100644
index 0000000000..1be7853098
--- /dev/null
+++ b/examples/parsing_stream.py
@@ -0,0 +1,42 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+
+with client.chat.completions.stream(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {"role": "system", "content": "You are a helpful math tutor."},
+        {"role": "user", "content": "solve 8x + 31 = 2"},
+    ],
+    response_format=MathResponse,
+) as stream:
+    for event in stream:
+        if event.type == "content.delta":
+            print(event.delta, end="", flush=True)
+        elif event.type == "content.done":
+            print("\n")
+            if event.parsed is not None:
+                print(f"answer: {event.parsed.final_answer}")
+        elif event.type == "refusal.delta":
+            print(event.delta, end="", flush=True)
+        elif event.type == "refusal.done":
+            print()
+
+print("---------------")
+rich.print(stream.get_final_completion())
diff --git a/examples/parsing_tools.py b/examples/parsing_tools.py
new file mode 100644
index 0000000000..26921b1df6
--- /dev/null
+++ b/examples/parsing_tools.py
@@ -0,0 +1,80 @@
+from enum import Enum
+from typing import List, Union
+
+import rich
+from pydantic import BaseModel
+
+import openai
+from openai import OpenAI
+
+
+class Table(str, Enum):
+    orders = "orders"
+    customers = "customers"
+    products = "products"
+
+
+class Column(str, Enum):
+    id = "id"
+    status = "status"
+    expected_delivery_date = "expected_delivery_date"
+    delivered_at = "delivered_at"
+    shipped_at = "shipped_at"
+    ordered_at = "ordered_at"
+    canceled_at = "canceled_at"
+
+
+class Operator(str, Enum):
+    eq = "="
+    gt = ">"
+    lt = "<"
+    le = "<="
+    ge = ">="
+    ne = "!="
+
+
+class OrderBy(str, Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class DynamicValue(BaseModel):
+    column_name: str
+
+
+class Condition(BaseModel):
+    column: str
+    operator: Operator
+    value: Union[str, int, DynamicValue]
+
+
+class Query(BaseModel):
+    table_name: Table
+    columns: List[Column]
+    conditions: List[Condition]
+    order_by: OrderBy
+
+
+client = OpenAI()
+
+completion = client.chat.completions.parse(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant. The current date is August 6, 2024. You help users query for the data they are looking for by calling the query function.",
+        },
+        {
+            "role": "user",
+            "content": "look up all my orders in november of last year that were fulfilled but not delivered on time",
+        },
+    ],
+    tools=[
+        openai.pydantic_function_tool(Query),
+    ],
+)
+
+tool_call = (completion.choices[0].message.tool_calls or [])[0]
+rich.print(tool_call.function)
+assert isinstance(tool_call.function.parsed_arguments, Query)
+print(tool_call.function.parsed_arguments.table_name)
diff --git a/examples/parsing_tools_stream.py b/examples/parsing_tools_stream.py
new file mode 100644
index 0000000000..b7dcd3d230
--- /dev/null
+++ b/examples/parsing_tools_stream.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import rich
+from pydantic import BaseModel
+
+import openai
+from openai import OpenAI
+
+
+class GetWeather(BaseModel):
+    city: str
+    country: str
+
+
+client = OpenAI()
+
+
+with client.chat.completions.stream(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like in SF and New York?",
+        },
+    ],
+    tools=[
+        # because we're using `.parse_stream()`, the returned tool calls
+        # will be automatically deserialized into this `GetWeather` type
+        openai.pydantic_function_tool(GetWeather, name="get_weather"),
+    ],
+    parallel_tool_calls=True,
+) as stream:
+    for event in stream:
+        if event.type == "tool_calls.function.arguments.delta" or event.type == "tool_calls.function.arguments.done":
+            rich.get_console().print(event, width=80)
+
+print("----\n")
+rich.print(stream.get_final_completion())
diff --git a/examples/picture.py b/examples/picture.py
new file mode 100644
index 0000000000..c27b52b0da
--- /dev/null
+++ b/examples/picture.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+from openai import OpenAI
+
+# gets OPENAI_API_KEY from your environment variables
+openai = OpenAI()
+
+prompt = "An astronaut lounging in a tropical resort in space, pixel art"
+model = "dall-e-3"
+
+
+def main() -> None:
+    # Generate an image based on the prompt
+    response = openai.images.generate(prompt=prompt, model=model)
+
+    # Prints response containing a URL link to image
+    print(response)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/realtime/audio_util.py b/examples/realtime/audio_util.py
new file mode 100644
index 0000000000..b073cc45be
--- /dev/null
+++ b/examples/realtime/audio_util.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import io
+import base64
+import asyncio
+import threading
+from typing import Callable, Awaitable
+
+import numpy as np
+import pyaudio
+import sounddevice as sd
+from pydub import AudioSegment
+
+from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
+
+CHUNK_LENGTH_S = 0.05  # 100ms
+SAMPLE_RATE = 24000
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+
+# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
+
+
+def audio_to_pcm16_base64(audio_bytes: bytes) -> bytes:
+    # load the audio file from the byte stream
+    audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
+    print(f"Loaded audio: {audio.frame_rate=} {audio.channels=} {audio.sample_width=} {audio.frame_width=}")
+    # resample to 24kHz mono pcm16
+    pcm_audio = audio.set_frame_rate(SAMPLE_RATE).set_channels(CHANNELS).set_sample_width(2).raw_data
+    return pcm_audio
+
+
+class AudioPlayerAsync:
+    def __init__(self):
+        self.queue = []
+        self.lock = threading.Lock()
+        self.stream = sd.OutputStream(
+            callback=self.callback,
+            samplerate=SAMPLE_RATE,
+            channels=CHANNELS,
+            dtype=np.int16,
+            blocksize=int(CHUNK_LENGTH_S * SAMPLE_RATE),
+        )
+        self.playing = False
+        self._frame_count = 0
+
+    def callback(self, outdata, frames, time, status):  # noqa
+        with self.lock:
+            data = np.empty(0, dtype=np.int16)
+
+            # get next item from queue if there is still space in the buffer
+            while len(data) < frames and len(self.queue) > 0:
+                item = self.queue.pop(0)
+                frames_needed = frames - len(data)
+                data = np.concatenate((data, item[:frames_needed]))
+                if len(item) > frames_needed:
+                    self.queue.insert(0, item[frames_needed:])
+
+            self._frame_count += len(data)
+
+            # fill the rest of the frames with zeros if there is no more data
+            if len(data) < frames:
+                data = np.concatenate((data, np.zeros(frames - len(data), dtype=np.int16)))
+
+        outdata[:] = data.reshape(-1, 1)
+
+    def reset_frame_count(self):
+        self._frame_count = 0
+
+    def get_frame_count(self):
+        return self._frame_count
+
+    def add_data(self, data: bytes):
+        with self.lock:
+            # bytes is pcm16 single channel audio data, convert to numpy array
+            np_data = np.frombuffer(data, dtype=np.int16)
+            self.queue.append(np_data)
+            if not self.playing:
+                self.start()
+
+    def start(self):
+        self.playing = True
+        self.stream.start()
+
+    def stop(self):
+        self.playing = False
+        self.stream.stop()
+        with self.lock:
+            self.queue = []
+
+    def terminate(self):
+        self.stream.close()
+
+
+async def send_audio_worker_sounddevice(
+    connection: AsyncRealtimeConnection,
+    should_send: Callable[[], bool] | None = None,
+    start_send: Callable[[], Awaitable[None]] | None = None,
+):
+    sent_audio = False
+
+    device_info = sd.query_devices()
+    print(device_info)
+
+    read_size = int(SAMPLE_RATE * 0.02)
+
+    stream = sd.InputStream(
+        channels=CHANNELS,
+        samplerate=SAMPLE_RATE,
+        dtype="int16",
+    )
+    stream.start()
+
+    try:
+        while True:
+            if stream.read_available < read_size:
+                await asyncio.sleep(0)
+                continue
+
+            data, _ = stream.read(read_size)
+
+            if should_send() if should_send else True:
+                if not sent_audio and start_send:
+                    await start_send()
+                await connection.send(
+                    {"type": "input_audio_buffer.append", "audio": base64.b64encode(data).decode("utf-8")}
+                )
+                sent_audio = True
+
+            elif sent_audio:
+                print("Done, triggering inference")
+                await connection.send({"type": "input_audio_buffer.commit"})
+                await connection.send({"type": "response.create", "response": {}})
+                sent_audio = False
+
+            await asyncio.sleep(0)
+
+    except KeyboardInterrupt:
+        pass
+    finally:
+        stream.stop()
+        stream.close()
diff --git a/examples/realtime/azure_realtime.py b/examples/realtime/azure_realtime.py
new file mode 100644
index 0000000000..de88d47052
--- /dev/null
+++ b/examples/realtime/azure_realtime.py
@@ -0,0 +1,57 @@
+import os
+import asyncio
+
+from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
+
+from openai import AsyncAzureOpenAI
+
+# Azure OpenAI Realtime Docs
+
+# How-to: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio
+# Supported models and API versions: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio#supported-models
+# Entra ID auth: https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity
+
+
+async def main() -> None:
+    """The following example demonstrates how to configure Azure OpenAI to use the Realtime API.
+    For an audio example, see push_to_talk_app.py and update the client and model parameter accordingly.
+
+    When prompted for user input, type a message and hit enter to send it to the model.
+    Enter "q" to quit the conversation.
+    """
+
+    credential = DefaultAzureCredential()
+    client = AsyncAzureOpenAI(
+        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+        azure_ad_token_provider=get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default"),
+        api_version="2024-10-01-preview",
+    )
+    async with client.beta.realtime.connect(
+        model="gpt-4o-realtime-preview",  # deployment name for your model
+    ) as connection:
+        await connection.session.update(session={"modalities": ["text"]})  # type: ignore
+        while True:
+            user_input = input("Enter a message: ")
+            if user_input == "q":
+                break
+
+            await connection.conversation.item.create(
+                item={
+                    "type": "message",
+                    "role": "user",
+                    "content": [{"type": "input_text", "text": user_input}],
+                }
+            )
+            await connection.response.create()
+            async for event in connection:
+                if event.type == "response.text.delta":
+                    print(event.delta, flush=True, end="")
+                elif event.type == "response.text.done":
+                    print()
+                elif event.type == "response.done":
+                    break
+
+    await credential.close()
+
+
+asyncio.run(main())
diff --git a/examples/realtime/push_to_talk_app.py b/examples/realtime/push_to_talk_app.py
new file mode 100755
index 0000000000..02d3f762d0
--- /dev/null
+++ b/examples/realtime/push_to_talk_app.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env uv run
+####################################################################
+# Sample TUI app with a push to talk interface to the Realtime API #
+# If you have `uv` installed and the `OPENAI_API_KEY`              #
+# environment variable set, you can run this example with just     #
+#                                                                  #
+# `./examples/realtime/push_to_talk_app.py`                        #
+#                                                                  #
+# On Mac, you'll also need `brew install portaudio ffmpeg`           #
+####################################################################
+#
+# /// script
+# requires-python = ">=3.9"
+# dependencies = [
+#     "textual",
+#     "numpy",
+#     "pyaudio",
+#     "pydub",
+#     "sounddevice",
+#     "openai[realtime]",
+# ]
+#
+# [tool.uv.sources]
+# openai = { path = "../../", editable = true }
+# ///
+from __future__ import annotations
+
+import base64
+import asyncio
+from typing import Any, cast
+from typing_extensions import override
+
+from textual import events
+from audio_util import CHANNELS, SAMPLE_RATE, AudioPlayerAsync
+from textual.app import App, ComposeResult
+from textual.widgets import Button, Static, RichLog
+from textual.reactive import reactive
+from textual.containers import Container
+
+from openai import AsyncOpenAI
+from openai.types.beta.realtime.session import Session
+from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
+
+
+class SessionDisplay(Static):
+    """A widget that shows the current session ID."""
+
+    session_id = reactive("")
+
+    @override
+    def render(self) -> str:
+        return f"Session ID: {self.session_id}" if self.session_id else "Connecting..."
+
+
+class AudioStatusIndicator(Static):
+    """A widget that shows the current audio recording status."""
+
+    is_recording = reactive(False)
+
+    @override
+    def render(self) -> str:
+        status = (
+            "🔴 Recording... (Press K to stop)" if self.is_recording else "⚪ Press K to start recording (Q to quit)"
+        )
+        return status
+
+
+class RealtimeApp(App[None]):
+    CSS = """
+        Screen {
+            background: #1a1b26;  /* Dark blue-grey background */
+        }
+
+        Container {
+            border: double rgb(91, 164, 91);
+        }
+
+        Horizontal {
+            width: 100%;
+        }
+
+        #input-container {
+            height: 5;  /* Explicit height for input container */
+            margin: 1 1;
+            padding: 1 2;
+        }
+
+        Input {
+            width: 80%;
+            height: 3;  /* Explicit height for input */
+        }
+
+        Button {
+            width: 20%;
+            height: 3;  /* Explicit height for button */
+        }
+
+        #bottom-pane {
+            width: 100%;
+            height: 82%;  /* Reduced to make room for session display */
+            border: round rgb(205, 133, 63);
+            content-align: center middle;
+        }
+
+        #status-indicator {
+            height: 3;
+            content-align: center middle;
+            background: #2a2b36;
+            border: solid rgb(91, 164, 91);
+            margin: 1 1;
+        }
+
+        #session-display {
+            height: 3;
+            content-align: center middle;
+            background: #2a2b36;
+            border: solid rgb(91, 164, 91);
+            margin: 1 1;
+        }
+
+        Static {
+            color: white;
+        }
+    """
+
+    client: AsyncOpenAI
+    should_send_audio: asyncio.Event
+    audio_player: AudioPlayerAsync
+    last_audio_item_id: str | None
+    connection: AsyncRealtimeConnection | None
+    session: Session | None
+    connected: asyncio.Event
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.connection = None
+        self.session = None
+        self.client = AsyncOpenAI()
+        self.audio_player = AudioPlayerAsync()
+        self.last_audio_item_id = None
+        self.should_send_audio = asyncio.Event()
+        self.connected = asyncio.Event()
+
+    @override
+    def compose(self) -> ComposeResult:
+        """Create child widgets for the app."""
+        with Container():
+            yield SessionDisplay(id="session-display")
+            yield AudioStatusIndicator(id="status-indicator")
+            yield RichLog(id="bottom-pane", wrap=True, highlight=True, markup=True)
+
+    async def on_mount(self) -> None:
+        self.run_worker(self.handle_realtime_connection())
+        self.run_worker(self.send_mic_audio())
+
+    async def handle_realtime_connection(self) -> None:
+        async with self.client.beta.realtime.connect(model="gpt-4o-realtime-preview") as conn:
+            self.connection = conn
+            self.connected.set()
+
+            # note: this is the default and can be omitted
+            # if you want to manually handle VAD yourself, then set `'turn_detection': None`
+            await conn.session.update(session={"turn_detection": {"type": "server_vad"}})
+
+            acc_items: dict[str, Any] = {}
+
+            async for event in conn:
+                if event.type == "session.created":
+                    self.session = event.session
+                    session_display = self.query_one(SessionDisplay)
+                    assert event.session.id is not None
+                    session_display.session_id = event.session.id
+                    continue
+
+                if event.type == "session.updated":
+                    self.session = event.session
+                    continue
+
+                if event.type == "response.audio.delta":
+                    if event.item_id != self.last_audio_item_id:
+                        self.audio_player.reset_frame_count()
+                        self.last_audio_item_id = event.item_id
+
+                    bytes_data = base64.b64decode(event.delta)
+                    self.audio_player.add_data(bytes_data)
+                    continue
+
+                if event.type == "response.audio_transcript.delta":
+                    try:
+                        text = acc_items[event.item_id]
+                    except KeyError:
+                        acc_items[event.item_id] = event.delta
+                    else:
+                        acc_items[event.item_id] = text + event.delta
+
+                    # Clear and update the entire content because RichLog otherwise treats each delta as a new line
+                    bottom_pane = self.query_one("#bottom-pane", RichLog)
+                    bottom_pane.clear()
+                    bottom_pane.write(acc_items[event.item_id])
+                    continue
+
+    async def _get_connection(self) -> AsyncRealtimeConnection:
+        await self.connected.wait()
+        assert self.connection is not None
+        return self.connection
+
+    async def send_mic_audio(self) -> None:
+        import sounddevice as sd  # type: ignore
+
+        sent_audio = False
+
+        device_info = sd.query_devices()
+        print(device_info)
+
+        read_size = int(SAMPLE_RATE * 0.02)
+
+        stream = sd.InputStream(
+            channels=CHANNELS,
+            samplerate=SAMPLE_RATE,
+            dtype="int16",
+        )
+        stream.start()
+
+        status_indicator = self.query_one(AudioStatusIndicator)
+
+        try:
+            while True:
+                if stream.read_available < read_size:
+                    await asyncio.sleep(0)
+                    continue
+
+                await self.should_send_audio.wait()
+                status_indicator.is_recording = True
+
+                data, _ = stream.read(read_size)
+
+                connection = await self._get_connection()
+                if not sent_audio:
+                    asyncio.create_task(connection.send({"type": "response.cancel"}))
+                    sent_audio = True
+
+                await connection.input_audio_buffer.append(audio=base64.b64encode(cast(Any, data)).decode("utf-8"))
+
+                await asyncio.sleep(0)
+        except KeyboardInterrupt:
+            pass
+        finally:
+            stream.stop()
+            stream.close()
+
+    async def on_key(self, event: events.Key) -> None:
+        """Handle key press events."""
+        if event.key == "enter":
+            self.query_one(Button).press()
+            return
+
+        if event.key == "q":
+            self.exit()
+            return
+
+        if event.key == "k":
+            status_indicator = self.query_one(AudioStatusIndicator)
+            if status_indicator.is_recording:
+                self.should_send_audio.clear()
+                status_indicator.is_recording = False
+
+                if self.session and self.session.turn_detection is None:
+                    # The default in the API is that the model will automatically detect when the user has
+                    # stopped talking and then start responding itself.
+                    #
+                    # However if we're in manual `turn_detection` mode then we need to
+                    # manually tell the model to commit the audio buffer and start responding.
+                    conn = await self._get_connection()
+                    await conn.input_audio_buffer.commit()
+                    await conn.response.create()
+            else:
+                self.should_send_audio.set()
+                status_indicator.is_recording = True
+
+
+if __name__ == "__main__":
+    app = RealtimeApp()
+    app.run()
diff --git a/examples/responses/__init__.py b/examples/responses/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/responses/background.py b/examples/responses/background.py
new file mode 100644
index 0000000000..37b00f19be
--- /dev/null
+++ b/examples/responses/background.py
@@ -0,0 +1,46 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+id = None
+
+with client.responses.create(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    background=True,
+    stream=True,
+) as stream:
+    for event in stream:
+        if event.type == "response.created":
+            id = event.response.id
+        if "output_text" in event.type:
+            rich.print(event)
+        if event.sequence_number == 10:
+            break
+
+print("Interrupted. Continuing...")
+
+assert id is not None
+with client.responses.retrieve(
+    response_id=id,
+    stream=True,
+    starting_after=10,
+) as stream:
+    for event in stream:
+        if "output_text" in event.type:
+            rich.print(event)
diff --git a/examples/responses/background_async.py b/examples/responses/background_async.py
new file mode 100644
index 0000000000..9dbc78b784
--- /dev/null
+++ b/examples/responses/background_async.py
@@ -0,0 +1,52 @@
+import asyncio
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai._client import AsyncOpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+async def main() -> None:
+    client = AsyncOpenAI()
+    id = None
+
+    async with await client.responses.create(
+        input="solve 8x + 31 = 2",
+        model="gpt-4o-2024-08-06",
+        background=True,
+        stream=True,
+    ) as stream:
+        async for event in stream:
+            if event.type == "response.created":
+                id = event.response.id
+            if "output_text" in event.type:
+                rich.print(event)
+            if event.sequence_number == 10:
+                break
+
+    print("Interrupted. Continuing...")
+
+    assert id is not None
+    async with await client.responses.retrieve(
+        response_id=id,
+        stream=True,
+        starting_after=10,
+    ) as stream:
+        async for event in stream:
+            if "output_text" in event.type:
+                rich.print(event)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/responses/background_streaming.py b/examples/responses/background_streaming.py
new file mode 100755
index 0000000000..ed830d9910
--- /dev/null
+++ b/examples/responses/background_streaming.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env -S rye run python
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+id = None
+with client.responses.stream(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    text_format=MathResponse,
+    background=True,
+) as stream:
+    for event in stream:
+        if event.type == "response.created":
+            id = event.response.id
+        if "output_text" in event.type:
+            rich.print(event)
+        if event.sequence_number == 10:
+            break
+
+print("Interrupted. Continuing...")
+
+assert id is not None
+with client.responses.stream(
+    response_id=id,
+    starting_after=10,
+    text_format=MathResponse,
+) as stream:
+    for event in stream:
+        if "output_text" in event.type:
+            rich.print(event)
+
+    rich.print(stream.get_final_response())
diff --git a/examples/responses/background_streaming_async.py b/examples/responses/background_streaming_async.py
new file mode 100644
index 0000000000..178150dc15
--- /dev/null
+++ b/examples/responses/background_streaming_async.py
@@ -0,0 +1,53 @@
+import asyncio
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import AsyncOpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+async def main() -> None:
+    client = AsyncOpenAI()
+    id = None
+    async with client.responses.stream(
+        input="solve 8x + 31 = 2",
+        model="gpt-4o-2024-08-06",
+        text_format=MathResponse,
+        background=True,
+    ) as stream:
+        async for event in stream:
+            if event.type == "response.created":
+                id = event.response.id
+            if "output_text" in event.type:
+                rich.print(event)
+            if event.sequence_number == 10:
+                break
+
+    print("Interrupted. Continuing...")
+
+    assert id is not None
+    async with client.responses.stream(
+        response_id=id,
+        starting_after=10,
+        text_format=MathResponse,
+    ) as stream:
+        async for event in stream:
+            if "output_text" in event.type:
+                rich.print(event)
+
+        rich.print(stream.get_final_response())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/responses/streaming.py b/examples/responses/streaming.py
new file mode 100644
index 0000000000..39787968d6
--- /dev/null
+++ b/examples/responses/streaming.py
@@ -0,0 +1,30 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+
+with client.responses.stream(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    text_format=MathResponse,
+) as stream:
+    for event in stream:
+        if "output_text" in event.type:
+            rich.print(event)
+
+rich.print(stream.get_final_response())
diff --git a/examples/responses/streaming_tools.py b/examples/responses/streaming_tools.py
new file mode 100644
index 0000000000..f40cd9356d
--- /dev/null
+++ b/examples/responses/streaming_tools.py
@@ -0,0 +1,68 @@
+from enum import Enum
+from typing import List, Union
+
+import rich
+from pydantic import BaseModel
+
+import openai
+from openai import OpenAI
+
+
+class Table(str, Enum):
+    orders = "orders"
+    customers = "customers"
+    products = "products"
+
+
+class Column(str, Enum):
+    id = "id"
+    status = "status"
+    expected_delivery_date = "expected_delivery_date"
+    delivered_at = "delivered_at"
+    shipped_at = "shipped_at"
+    ordered_at = "ordered_at"
+    canceled_at = "canceled_at"
+
+
+class Operator(str, Enum):
+    eq = "="
+    gt = ">"
+    lt = "<"
+    le = "<="
+    ge = ">="
+    ne = "!="
+
+
+class OrderBy(str, Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class DynamicValue(BaseModel):
+    column_name: str
+
+
+class Condition(BaseModel):
+    column: str
+    operator: Operator
+    value: Union[str, int, DynamicValue]
+
+
+class Query(BaseModel):
+    table_name: Table
+    columns: List[Column]
+    conditions: List[Condition]
+    order_by: OrderBy
+
+
+client = OpenAI()
+
+with client.responses.stream(
+    model="gpt-4o-2024-08-06",
+    input="look up all my orders in november of last year that were fulfilled but not delivered on time",
+    tools=[
+        openai.pydantic_function_tool(Query),
+    ],
+) as stream:
+    for event in stream:
+        rich.print(event)
diff --git a/examples/responses/structured_outputs.py b/examples/responses/structured_outputs.py
new file mode 100644
index 0000000000..0b146bc0bc
--- /dev/null
+++ b/examples/responses/structured_outputs.py
@@ -0,0 +1,55 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+
+rsp = client.responses.parse(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    text_format=MathResponse,
+)
+
+for output in rsp.output:
+    if output.type != "message":
+        raise Exception("Unexpected non message")
+
+    for item in output.content:
+        if item.type != "output_text":
+            raise Exception("unexpected output type")
+
+        if not item.parsed:
+            raise Exception("Could not parse response")
+
+        rich.print(item.parsed)
+
+        print("answer: ", item.parsed.final_answer)
+
+# or
+
+message = rsp.output[0]
+assert message.type == "message"
+
+text = message.content[0]
+assert text.type == "output_text"
+
+if not text.parsed:
+    raise Exception("Could not parse response")
+
+rich.print(text.parsed)
+
+print("answer: ", text.parsed.final_answer)
diff --git a/examples/responses/structured_outputs_tools.py b/examples/responses/structured_outputs_tools.py
new file mode 100644
index 0000000000..918348207d
--- /dev/null
+++ b/examples/responses/structured_outputs_tools.py
@@ -0,0 +1,73 @@
+from enum import Enum
+from typing import List, Union
+
+import rich
+from pydantic import BaseModel
+
+import openai
+from openai import OpenAI
+
+
+class Table(str, Enum):
+    orders = "orders"
+    customers = "customers"
+    products = "products"
+
+
+class Column(str, Enum):
+    id = "id"
+    status = "status"
+    expected_delivery_date = "expected_delivery_date"
+    delivered_at = "delivered_at"
+    shipped_at = "shipped_at"
+    ordered_at = "ordered_at"
+    canceled_at = "canceled_at"
+
+
+class Operator(str, Enum):
+    eq = "="
+    gt = ">"
+    lt = "<"
+    le = "<="
+    ge = ">="
+    ne = "!="
+
+
+class OrderBy(str, Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class DynamicValue(BaseModel):
+    column_name: str
+
+
+class Condition(BaseModel):
+    column: str
+    operator: Operator
+    value: Union[str, int, DynamicValue]
+
+
+class Query(BaseModel):
+    table_name: Table
+    columns: List[Column]
+    conditions: List[Condition]
+    order_by: OrderBy
+
+
+client = OpenAI()
+
+response = client.responses.parse(
+    model="gpt-4o-2024-08-06",
+    input="look up all my orders in november of last year that were fulfilled but not delivered on time",
+    tools=[
+        openai.pydantic_function_tool(Query),
+    ],
+)
+
+rich.print(response)
+
+function_call = response.output[0]
+assert function_call.type == "function_call"
+assert isinstance(function_call.parsed_arguments, Query)
+print("table name:", function_call.parsed_arguments.table_name)
diff --git a/examples/speech_to_text.py b/examples/speech_to_text.py
new file mode 100755
index 0000000000..cc3f56b424
--- /dev/null
+++ b/examples/speech_to_text.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env rye run python
+
+import asyncio
+
+from openai import AsyncOpenAI
+from openai.helpers import Microphone
+
+# gets OPENAI_API_KEY from your environment variables
+openai = AsyncOpenAI()
+
+
+async def main() -> None:
+    print("Recording for the next 10 seconds...")
+    recording = await Microphone(timeout=10).record()
+    print("Recording complete")
+    transcription = await openai.audio.transcriptions.create(
+        model="whisper-1",
+        file=recording,
+    )
+
+    print(transcription.text)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/streaming.py b/examples/streaming.py
index 168877dfc5..9a84891a83 100755
--- a/examples/streaming.py
+++ b/examples/streaming.py
@@ -13,7 +13,7 @@
 def sync_main() -> None:
     client = OpenAI()
     response = client.completions.create(
-        model="text-davinci-002",
+        model="gpt-3.5-turbo-instruct",
         prompt="1,2,3,",
         max_tokens=5,
         temperature=0,
@@ -22,18 +22,18 @@ def sync_main() -> None:
 
     # You can manually control iteration over the response
     first = next(response)
-    print(f"got response data: {first.model_dump_json(indent=2)}")
+    print(f"got response data: {first.to_json()}")
 
     # Or you could automatically iterate through all of data.
     # Note that the for loop will not exit until *all* of the data has been processed.
     for data in response:
-        print(data.model_dump_json())
+        print(data.to_json())
 
 
 async def async_main() -> None:
     client = AsyncOpenAI()
     response = await client.completions.create(
-        model="text-davinci-002",
+        model="gpt-3.5-turbo-instruct",
         prompt="1,2,3,",
         max_tokens=5,
         temperature=0,
@@ -43,12 +43,12 @@ async def async_main() -> None:
     # You can manually control iteration over the response.
     # In Python 3.10+ you can also use the `await anext(response)` builtin instead
     first = await response.__anext__()
-    print(f"got response data: {first.model_dump_json(indent=2)}")
+    print(f"got response data: {first.to_json()}")
 
     # Or you could automatically iterate through all of data.
     # Note that the for loop will not exit until *all* of the data has been processed.
     async for data in response:
-        print(data.model_dump_json())
+        print(data.to_json())
 
 
 sync_main()
diff --git a/examples/text_to_speech.py b/examples/text_to_speech.py
new file mode 100755
index 0000000000..ac8b12b0ab
--- /dev/null
+++ b/examples/text_to_speech.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env rye run python
+
+import time
+import asyncio
+
+from openai import AsyncOpenAI
+from openai.helpers import LocalAudioPlayer
+
+# gets OPENAI_API_KEY from your environment variables
+openai = AsyncOpenAI()
+
+
+async def main() -> None:
+    start_time = time.time()
+
+    async with openai.audio.speech.with_streaming_response.create(
+        model="tts-1",
+        voice="alloy",
+        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
+        input="""I see skies of blue and clouds of white
+                The bright blessed days, the dark sacred nights
+                And I think to myself
+                What a wonderful world""",
+    ) as response:
+        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
+        await LocalAudioPlayer().play(response)
+        print(f"Time to play: {int((time.time() - start_time) * 1000)}ms")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/uploads.py b/examples/uploads.py
new file mode 100644
index 0000000000..c3896b365b
--- /dev/null
+++ b/examples/uploads.py
@@ -0,0 +1,46 @@
+import sys
+from pathlib import Path
+
+import rich
+
+from openai import OpenAI
+
+# generate this file using `./generate_file.sh`
+file = Path("/tmp/big_test_file.txt")
+
+client = OpenAI()
+
+
+def from_disk() -> None:
+    print("uploading file from disk")
+
+    upload = client.uploads.upload_file_chunked(
+        file=file,
+        mime_type="txt",
+        purpose="batch",
+    )
+    rich.print(upload)
+
+
+def from_in_memory() -> None:
+    print("uploading file from memory")
+
+    # read the data into memory ourselves to simulate
+    # it coming from somewhere else
+    data = file.read_bytes()
+    filename = "my_file.txt"
+
+    upload = client.uploads.upload_file_chunked(
+        file=data,
+        filename=filename,
+        bytes=len(data),
+        mime_type="txt",
+        purpose="batch",
+    )
+    rich.print(upload)
+
+
+if "memory" in sys.argv:
+    from_in_memory()
+else:
+    from_disk()
diff --git a/helpers.md b/helpers.md
new file mode 100644
index 0000000000..21ad8ac2fb
--- /dev/null
+++ b/helpers.md
@@ -0,0 +1,517 @@
+# Structured Outputs Parsing Helpers
+
+The OpenAI API supports extracting JSON from the model with the `response_format` request param, for more details on the API, see [this guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+The SDK provides a `client.chat.completions.parse()` method which is a wrapper over the `client.chat.completions.create()` that
+provides richer integrations with Python specific types & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+## Auto-parsing response content with Pydantic models
+
+You can pass a pydantic model to the `.parse()` method and the SDK will automatically convert the model
+into a JSON schema, send it to the API and parse the response content back into the given model.
+
+```py
+from typing import List
+from pydantic import BaseModel
+from openai import OpenAI
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+client = OpenAI()
+completion = client.chat.completions.parse(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {"role": "system", "content": "You are a helpful math tutor."},
+        {"role": "user", "content": "solve 8x + 31 = 2"},
+    ],
+    response_format=MathResponse,
+)
+
+message = completion.choices[0].message
+if message.parsed:
+    print(message.parsed.steps)
+    print("answer: ", message.parsed.final_answer)
+else:
+    print(message.refusal)
+```
+
+## Auto-parsing function tool calls
+
+The `.parse()` method will also automatically parse `function` tool calls if:
+
+- You use the `openai.pydantic_function_tool()` helper method
+- You mark your tool schema with `"strict": True`
+
+For example:
+
+```py
+from enum import Enum
+from typing import List, Union
+from pydantic import BaseModel
+import openai
+
+class Table(str, Enum):
+    orders = "orders"
+    customers = "customers"
+    products = "products"
+
+class Column(str, Enum):
+    id = "id"
+    status = "status"
+    expected_delivery_date = "expected_delivery_date"
+    delivered_at = "delivered_at"
+    shipped_at = "shipped_at"
+    ordered_at = "ordered_at"
+    canceled_at = "canceled_at"
+
+class Operator(str, Enum):
+    eq = "="
+    gt = ">"
+    lt = "<"
+    le = "<="
+    ge = ">="
+    ne = "!="
+
+class OrderBy(str, Enum):
+    asc = "asc"
+    desc = "desc"
+
+class DynamicValue(BaseModel):
+    column_name: str
+
+class Condition(BaseModel):
+    column: str
+    operator: Operator
+    value: Union[str, int, DynamicValue]
+
+class Query(BaseModel):
+    table_name: Table
+    columns: List[Column]
+    conditions: List[Condition]
+    order_by: OrderBy
+
+client = openai.OpenAI()
+completion = client.chat.completions.parse(
+    model="gpt-4o-2024-08-06",
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant. The current date is August 6, 2024. You help users query for the data they are looking for by calling the query function.",
+        },
+        {
+            "role": "user",
+            "content": "look up all my orders in may of last year that were fulfilled but not delivered on time",
+        },
+    ],
+    tools=[
+        openai.pydantic_function_tool(Query),
+    ],
+)
+
+tool_call = (completion.choices[0].message.tool_calls or [])[0]
+print(tool_call.function)
+assert isinstance(tool_call.function.parsed_arguments, Query)
+print(tool_call.function.parsed_arguments.table_name)
+```
+
+### Differences from `.create()`
+
+The `chat.completions.parse()` method imposes some additional restrictions on it's usage that `chat.completions.create()` does not.
+
+- If the completion completes with `finish_reason` set to `length` or `content_filter`, the `LengthFinishReasonError` / `ContentFilterFinishReasonError` errors will be raised.
+- Only strict function tools can be passed, e.g. `{'type': 'function', 'function': {..., 'strict': True}}`
+
+# Streaming Helpers
+
+OpenAI supports streaming responses when interacting with the [Chat Completion](#chat-completions-api) & [Assistant](#assistant-streaming-api) APIs.
+
+## Chat Completions API
+
+The SDK provides a `.chat.completions.stream()` method that wraps the `.chat.completions.create(stream=True)` stream providing a more granular event API & automatic accumulation of each delta.
+
+It also supports all aforementioned [parsing helpers](#structured-outputs-parsing-helpers).
+
+Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+```py
+from openai import AsyncOpenAI
+
+client = AsyncOpenAI()
+
+async with client.chat.completions.stream(
+    model='gpt-4o-2024-08-06',
+    messages=[...],
+) as stream:
+    async for event in stream:
+        if event.type == 'content.delta':
+            print(event.content, flush=True, end='')
+```
+
+When the context manager is entered, a `ChatCompletionStream` / `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator in the sync client and an async iterator in the async client. The full list of events that are yielded by the iterator are outlined [below](#chat-completions-events).
+
+When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+the context manager.
+
+### Chat Completions Events
+
+These events allow you to track the progress of the chat completion generation, access partial results, and handle different aspects of the stream separately.
+
+Below is a list of the different event types you may encounter:
+
+#### ChunkEvent
+
+Emitted for every chunk received from the API.
+
+- `type`: `"chunk"`
+- `chunk`: The raw `ChatCompletionChunk` object received from the API
+- `snapshot`: The current accumulated state of the chat completion
+
+#### ContentDeltaEvent
+
+Emitted for every chunk containing new content.
+
+- `type`: `"content.delta"`
+- `delta`: The new content string received in this chunk
+- `snapshot`: The accumulated content so far
+- `parsed`: The partially parsed content (if applicable)
+
+#### ContentDoneEvent
+
+Emitted when the content generation is complete. May be fired multiple times if there are multiple choices.
+
+- `type`: `"content.done"`
+- `content`: The full generated content
+- `parsed`: The fully parsed content (if applicable)
+
+#### RefusalDeltaEvent
+
+Emitted when a chunk contains part of a content refusal.
+
+- `type`: `"refusal.delta"`
+- `delta`: The new refusal content string received in this chunk
+- `snapshot`: The accumulated refusal content string so far
+
+#### RefusalDoneEvent
+
+Emitted when the refusal content is complete.
+
+- `type`: `"refusal.done"`
+- `refusal`: The full refusal content
+
+#### FunctionToolCallArgumentsDeltaEvent
+
+Emitted when a chunk contains part of a function tool call's arguments.
+
+- `type`: `"tool_calls.function.arguments.delta"`
+- `name`: The name of the function being called
+- `index`: The index of the tool call
+- `arguments`: The accumulated raw JSON string of arguments
+- `parsed_arguments`: The partially parsed arguments object
+- `arguments_delta`: The new JSON string fragment received in this chunk
+
+#### FunctionToolCallArgumentsDoneEvent
+
+Emitted when a function tool call's arguments are complete.
+
+- `type`: `"tool_calls.function.arguments.done"`
+- `name`: The name of the function being called
+- `index`: The index of the tool call
+- `arguments`: The full raw JSON string of arguments
+- `parsed_arguments`: The fully parsed arguments object. If you used `openai.pydantic_function_tool()` this will be an instance of the given model.
+
+#### LogprobsContentDeltaEvent
+
+Emitted when a chunk contains new content [log probabilities](https://cookbook.openai.com/examples/using_logprobs).
+
+- `type`: `"logprobs.content.delta"`
+- `content`: A list of the new log probabilities received in this chunk
+- `snapshot`: A list of the accumulated log probabilities so far
+
+#### LogprobsContentDoneEvent
+
+Emitted when all content [log probabilities](https://cookbook.openai.com/examples/using_logprobs) have been received.
+
+- `type`: `"logprobs.content.done"`
+- `content`: The full list of token log probabilities for the content
+
+#### LogprobsRefusalDeltaEvent
+
+Emitted when a chunk contains new refusal [log probabilities](https://cookbook.openai.com/examples/using_logprobs).
+
+- `type`: `"logprobs.refusal.delta"`
+- `refusal`: A list of the new log probabilities received in this chunk
+- `snapshot`: A list of the accumulated log probabilities so far
+
+#### LogprobsRefusalDoneEvent
+
+Emitted when all refusal [log probabilities](https://cookbook.openai.com/examples/using_logprobs) have been received.
+
+- `type`: `"logprobs.refusal.done"`
+- `refusal`: The full list of token log probabilities for the refusal
+
+### Chat Completions stream methods
+
+A handful of helper methods are provided on the stream class for additional convenience,
+
+**`.get_final_completion()`**
+
+Returns the accumulated `ParsedChatCompletion` object
+
+```py
+async with client.chat.completions.stream(...) as stream:
+    ...
+
+completion = await stream.get_final_completion()
+print(completion.choices[0].message)
+```
+
+**`.until_done()`**
+
+If you want to wait for the stream to complete, you can use the `.until_done()` method.
+
+```py
+async with client.chat.completions.stream(...) as stream:
+    await stream.until_done()
+    # stream is now finished
+```
+
+## Assistant Streaming API
+
+OpenAI supports streaming responses from Assistants. The SDK provides convenience wrappers around the API
+so you can subscribe to the types of events you are interested in as well as receive accumulated responses.
+
+More information can be found in the documentation: [Assistant Streaming](https://platform.openai.com/docs/assistants/overview?lang=python)
+
+#### An example of creating a run and subscribing to some events
+
+You can subscribe to events by creating an event handler class and overloading the relevant event handlers.
+
+```python
+from typing_extensions import override
+from openai import AssistantEventHandler, OpenAI
+from openai.types.beta.threads import Text, TextDelta
+from openai.types.beta.threads.runs import ToolCall, ToolCallDelta
+
+client = openai.OpenAI()
+
+# First, we create a EventHandler class to define
+# how we want to handle the events in the response stream.
+
+class EventHandler(AssistantEventHandler):
+  @override
+  def on_text_created(self, text: Text) -> None:
+    print(f"\nassistant > ", end="", flush=True)
+
+  @override
+  def on_text_delta(self, delta: TextDelta, snapshot: Text):
+    print(delta.value, end="", flush=True)
+
+  @override
+  def on_tool_call_created(self, tool_call: ToolCall):
+    print(f"\nassistant > {tool_call.type}\n", flush=True)
+
+  @override
+  def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall):
+    if delta.type == "code_interpreter" and delta.code_interpreter:
+      if delta.code_interpreter.input:
+        print(delta.code_interpreter.input, end="", flush=True)
+      if delta.code_interpreter.outputs:
+        print(f"\n\noutput >", flush=True)
+        for output in delta.code_interpreter.outputs:
+          if output.type == "logs":
+            print(f"\n{output.logs}", flush=True)
+
+# Then, we use the `stream` SDK helper
+# with the `EventHandler` class to create the Run
+# and stream the response.
+
+with client.beta.threads.runs.stream(
+  thread_id="thread_id",
+  assistant_id="assistant_id",
+  event_handler=EventHandler(),
+) as stream:
+  stream.until_done()
+```
+
+#### An example of iterating over events
+
+You can also iterate over all the streamed events.
+
+```python
+with client.beta.threads.runs.stream(
+  thread_id=thread.id,
+  assistant_id=assistant.id
+) as stream:
+    for event in stream:
+        # Print the text from text delta events
+        if event.event == "thread.message.delta" and event.data.delta.content:
+            print(event.data.delta.content[0].text)
+```
+
+#### An example of iterating over text
+
+You can also iterate over just the text deltas received
+
+```python
+with client.beta.threads.runs.stream(
+  thread_id=thread.id,
+  assistant_id=assistant.id
+) as stream:
+    for text in stream.text_deltas:
+        print(text)
+```
+
+### Creating Streams
+
+There are three helper methods for creating streams:
+
+```python
+client.beta.threads.runs.stream()
+```
+
+This method can be used to start and stream the response to an existing run with an associated thread
+that is already populated with messages.
+
+```python
+client.beta.threads.create_and_run_stream()
+```
+
+This method can be used to add a message to a thread, start a run and then stream the response.
+
+```python
+client.beta.threads.runs.submit_tool_outputs_stream()
+```
+
+This method can be used to submit a tool output to a run waiting on the output and start a stream.
+
+### Assistant Events
+
+The assistant API provides events you can subscribe to for the following events.
+
+```python
+def on_event(self, event: AssistantStreamEvent)
+```
+
+This allows you to subscribe to all the possible raw events sent by the OpenAI streaming API.
+In many cases it will be more convenient to subscribe to a more specific set of events for your use case.
+
+More information on the types of events can be found here: [Events](https://platform.openai.com/docs/api-reference/assistants-streaming/events)
+
+```python
+def on_run_step_created(self, run_step: RunStep)
+def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep)
+def on_run_step_done(self, run_step: RunStep)
+```
+
+These events allow you to subscribe to the creation, delta and completion of a RunStep.
+
+For more information on how Runs and RunSteps work see the documentation [Runs and RunSteps](https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps)
+
+```python
+def on_message_created(self, message: Message)
+def on_message_delta(self, delta: MessageDelta, snapshot: Message)
+def on_message_done(self, message: Message)
+```
+
+This allows you to subscribe to Message creation, delta and completion events. Messages can contain
+different types of content that can be sent from a model (and events are available for specific content types).
+For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
+
+More information on messages can be found
+on in the documentation page [Message](https://platform.openai.com/docs/api-reference/messages/object).
+
+```python
+def on_text_created(self, text: Text)
+def on_text_delta(self, delta: TextDelta, snapshot: Text)
+def on_text_done(self, text: Text)
+```
+
+These events allow you to subscribe to the creation, delta and completion of a Text content (a specific type of message).
+For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
+
+```python
+def on_image_file_done(self, image_file: ImageFile)
+```
+
+Image files are not sent incrementally so an event is provided for when a image file is available.
+
+```python
+def on_tool_call_created(self, tool_call: ToolCall)
+def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall)
+def on_tool_call_done(self, tool_call: ToolCall)
+```
+
+These events allow you to subscribe to events for the creation, delta and completion of a ToolCall.
+
+More information on tools can be found here [Tools](https://platform.openai.com/docs/assistants/tools)
+
+```python
+def on_end(self)
+```
+
+The last event send when a stream ends.
+
+```python
+def on_timeout(self)
+```
+
+This event is triggered if the request times out.
+
+```python
+def on_exception(self, exception: Exception)
+```
+
+This event is triggered if an exception occurs during streaming.
+
+### Assistant Methods
+
+The assistant streaming object also provides a few methods for convenience:
+
+```python
+def current_event() -> AssistantStreamEvent | None
+def current_run() -> Run | None
+def current_message_snapshot() -> Message | None
+def current_run_step_snapshot() -> RunStep | None
+```
+
+These methods are provided to allow you to access additional context from within event handlers. In many cases
+the handlers should include all the information you need for processing, but if additional context is required it
+can be accessed.
+
+Note: There is not always a relevant context in certain situations (these will be `None` in those cases).
+
+```python
+def get_final_run(self) -> Run
+def get_final_run_steps(self) -> List[RunStep]
+def get_final_messages(self) -> List[Message]
+```
+
+These methods are provided for convenience to collect information at the end of a stream. Calling these events
+will trigger consumption of the stream until completion and then return the relevant accumulated objects.
+
+# Polling Helpers
+
+When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete.
+The SDK includes helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
+If an API method results in an action which could benefit from polling there will be a corresponding version of the
+method ending in `_and_poll`.
+
+All methods also allow you to set the polling frequency, how often the API is checked for an update, via a function argument (`poll_interval_ms`).
+
+The polling methods are:
+
+```python
+client.beta.threads.create_and_run_poll(...)
+client.beta.threads.runs.create_and_poll(...)
+client.beta.threads.runs.submit_tool_outputs_and_poll(...)
+client.beta.vector_stores.files.upload_and_poll(...)
+client.beta.vector_stores.files.create_and_poll(...)
+client.beta.vector_stores.file_batches.create_and_poll(...)
+client.beta.vector_stores.file_batches.upload_and_poll(...)
+```
diff --git a/mypy.ini b/mypy.ini
index a4517a002d..660f1a086e 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,10 +2,16 @@
 pretty = True
 show_error_codes = True
 
-# Exclude _files.py because mypy isn't smart enough to apply
+# Exclude _files.py and _logs.py because mypy isn't smart enough to apply
 # the correct type narrowing and as this is an internal module
 # it's fine to just use Pyright.
-exclude = ^(src/openai/_files\.py|_dev/.*\.py)$
+#
+# We also exclude our `tests` as mypy doesn't always infer
+# types correctly and Pyright will still catch any type errors.
+
+# realtime examples use inline `uv` script dependencies
+# which means it can't be type checked
+exclude = ^(src/openai/_files\.py|_dev/.*\.py|tests/.*|src/openai/_utils/_logs\.py|examples/realtime/audio_util\.py|examples/realtime/push_to_talk_app\.py)$
 
 strict_equality = True
 implicit_reexport = True
@@ -38,7 +44,7 @@ cache_fine_grained = True
 # ```
 # Changing this codegen to make mypy happy would increase complexity
 # and would not be worth it.
-disable_error_code = func-returns-value
+disable_error_code = func-returns-value,overload-cannot-match
 
 # https://github.com/python/mypy/issues/12162
 [mypy.overrides]
diff --git a/pyproject.toml b/pyproject.toml
index f17def16b6..0a3e3e1ca8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,8 @@
 [project]
 name = "openai"
-version = "1.3.5"
+version = "1.93.0"
 description = "The official Python library for the openai API"
-readme = "README.md"
+dynamic = ["readme"]
 license = "Apache-2.0"
 authors = [
 { name = "OpenAI", email = "support@openai.com" },
@@ -10,16 +10,17 @@ authors = [
 dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.5, <5",
-    "anyio>=3.5.0, <4",
+    "typing-extensions>=4.11, <5",
+    "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
-    "tqdm > 4"
+    "sniffio",
+    "tqdm > 4",
+    "jiter>=0.4.0, <1",
 ]
-requires-python = ">= 3.7.1"
+requires-python = ">= 3.8"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
@@ -34,9 +35,6 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-[project.optional-dependencies]
-datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
-
 [project.urls]
 Homepage = "https://github.com/openai/openai-python"
 Repository = "https://github.com/openai/openai-python"
@@ -44,37 +42,56 @@ Repository = "https://github.com/openai/openai-python"
 [project.scripts]
 openai = "openai.cli:main"
 
+[project.optional-dependencies]
+aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.6"]
+realtime = ["websockets >= 13, < 16"]
+datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
+voice_helpers = ["sounddevice>=0.5.1", "numpy>=2.0.2"]
+
 [tool.rye]
 managed = true
+# version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright==1.1.332",
-    "mypy==1.6.1",
-    "black==23.3.0",
-    "respx==0.19.2",
-    "pytest==7.1.1",
-    "pytest-asyncio==0.21.1",
-    "ruff==0.0.282",
-    "isort==5.10.1",
-    "time-machine==2.9.0",
-    "nox==2023.4.22",
+    "pyright==1.1.399",
+    "mypy",
+    "respx",
+    "pytest",
+    "pytest-asyncio",
+    "ruff",
+    "time-machine",
+    "nox",
     "dirty-equals>=0.6.0",
+    "importlib-metadata>=6.7.0",
+    "rich>=13.7.1",
+    "inline-snapshot >=0.7.0",
     "azure-identity >=1.14.1",
-    "types-tqdm > 4"
+    "types-tqdm > 4",
+    "types-pyaudio > 0",
+    "trio >=0.22.2",
+    "nest_asyncio==1.6.0",
+    "pytest-xdist>=3.6.1",
 ]
 
 [tool.rye.scripts]
 format = { chain = [
-  "format:black",
+  "format:ruff",
   "format:docs",
+  "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
   "format:ruff",
-  "format:isort",
 ]}
-"format:black" = "black ."
-"format:docs" = "python bin/blacken-docs.py README.md api.md"
-"format:ruff" = "ruff --fix ."
-"format:isort" = "isort ."
+"format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
+"format:ruff" = "ruff format"
+
+"lint" = { chain = [
+  "check:ruff",
+  "typecheck",
+  "check:importable",
+]}
+"check:ruff" = "ruff check ."
+"fix:ruff" = "ruff check --fix ."
 
-"check:ruff" = "ruff ."
+"check:importable" = "python -c 'import openai'"
 
 typecheck = { chain = [
   "typecheck:pyright",
@@ -82,10 +99,10 @@ typecheck = { chain = [
 ]}
 "typecheck:pyright" = "pyright"
 "typecheck:verify-types" = "pyright --verifytypes openai --ignoreexternal"
-"typecheck:mypy" = "mypy --enable-incomplete-feature=Unpack ."
+"typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -96,15 +113,38 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/openai"]
 
-[tool.black]
-line-length = 120
-target-version = ["py37"]
+[tool.hatch.build.targets.sdist]
+# Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc)
+include = [
+  "/*.toml",
+  "/*.json",
+  "/*.lock",
+  "/*.md",
+  "/mypy.ini",
+  "/noxfile.py",
+  "bin/*",
+  "examples/*",
+  "src/*",
+  "tests/*",
+]
+
+[tool.hatch.metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
+# replace relative links with absolute links
+pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
+replacement = '[\1](https://github.com/openai/openai-python/tree/main/\g<2>)'
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "--tb=short"
+addopts = "--tb=short -n auto"
 xfail_strict = true
 asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
 filterwarnings = [
   "error"
 ]
@@ -114,29 +154,39 @@ filterwarnings = [
 # there are a couple of flags that are still disabled by
 # default in strict mode as they are experimental and niche.
 typeCheckingMode = "strict"
-pythonVersion = "3.7"
+pythonVersion = "3.8"
 
 exclude = [
     "_dev",
     ".venv",
     ".nox",
+
+    # uses inline `uv` script dependencies
+    # which means it can't be type checked
+    "examples/realtime/audio_util.py",
+    "examples/realtime/push_to_talk_app.py"
 ]
 
 reportImplicitOverride = true
+reportOverlappingOverload = false
 
 reportImportCycles = false
 reportPrivateUsage = false
 
-[tool.isort]
-profile = "black"
-length_sort = true
-extra_standard_library = ["typing_extensions"]
-
 [tool.ruff]
 line-length = 120
-format = "grouped"
+output-format = "grouped"
 target-version = "py37"
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
+  # isort
+  "I",
+  # bugbear rules
+  "B",
   # remove unused imports
   "F401",
   # bare except statements
@@ -146,16 +196,33 @@ select = [
   # print statements
   "T201",
   "T203",
+  # misuse of typing.TYPE_CHECKING
+  "TC004",
+  # import rules
+  "TID251",
+]
+ignore = [
+  # mutable defaults
+  "B006",
 ]
 unfixable = [
   # disable auto fix for print statements
   "T201",
   "T203",
 ]
-ignore-init-module-imports = true
 
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"functools.lru_cache".msg = "This function does not retain type information for the wrapped function's arguments; The `lru_cache` function from `_utils` should be used instead"
+
+[tool.ruff.lint.isort]
+length-sort = true
+length-sort-straight = true
+combine-as-imports = true
+extra-standard-library = ["typing_extensions"]
+known-first-party = ["openai", "tests"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "bin/**.py" = ["T201", "T203"]
+"scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
 "examples/**.py" = ["T201", "T203"]
diff --git a/release-please-config.json b/release-please-config.json
index 5c66d801f5..745ef5fd54 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -5,6 +5,8 @@
   "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
   "include-v-in-tag": true,
   "include-component-in-tag": false,
+  "versioning": "prerelease",
+  "prerelease": true,
   "bump-minor-pre-major": true,
   "bump-patch-for-minor-pre-major": false,
   "pull-request-header": "Automated Release PR",
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 0747babdc5..138fd3b4f6 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -5,70 +5,220 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.13
+    # via httpx-aiohttp
+    # via openai
+aiosignal==1.3.2
+    # via aiohttp
 annotated-types==0.6.0
-anyio==3.7.1
+    # via pydantic
+anyio==4.1.0
+    # via httpx
+    # via openai
 argcomplete==3.1.2
-attrs==23.1.0
-azure-core==1.29.5
-azure-identity==1.15.0
-black==23.3.0
+    # via nox
+asttokens==2.4.1
+    # via inline-snapshot
+async-timeout==5.0.1
+    # via aiohttp
+attrs==24.2.0
+    # via aiohttp
+    # via outcome
+    # via trio
+azure-core==1.31.0
+    # via azure-identity
+azure-identity==1.19.0
+black==24.10.0
+    # via inline-snapshot
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
+    # via requests
 cffi==1.16.0
-charset-normalizer==3.3.1
+    # via cryptography
+    # via sounddevice
+charset-normalizer==3.3.2
+    # via requests
 click==8.1.7
+    # via black
+    # via inline-snapshot
 colorlog==6.7.0
-cryptography==41.0.5
+    # via nox
+cryptography==42.0.7
+    # via azure-identity
+    # via msal
+    # via pyjwt
 dirty-equals==0.6.0
 distlib==0.3.7
+    # via virtualenv
 distro==1.8.0
-exceptiongroup==1.1.3
+    # via openai
+exceptiongroup==1.2.2
+    # via anyio
+    # via pytest
+    # via trio
+execnet==2.1.1
+    # via pytest-xdist
+executing==2.1.0
+    # via inline-snapshot
 filelock==3.12.4
-h11==0.12.0
-httpcore==0.15.0
-httpx==0.23.0
+    # via virtualenv
+frozenlist==1.7.0
+    # via aiohttp
+    # via aiosignal
+h11==0.14.0
+    # via httpcore
+httpcore==1.0.2
+    # via httpx
+httpx==0.28.1
+    # via httpx-aiohttp
+    # via openai
+    # via respx
+httpx-aiohttp==0.1.6
+    # via openai
 idna==3.4
+    # via anyio
+    # via httpx
+    # via requests
+    # via trio
+    # via yarl
+importlib-metadata==7.0.0
 iniconfig==2.0.0
-isort==5.10.1
-msal==1.24.1
-msal-extensions==1.0.0
-mypy==1.6.1
+    # via pytest
+inline-snapshot==0.10.2
+jiter==0.5.0
+    # via openai
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
+msal==1.31.0
+    # via azure-identity
+    # via msal-extensions
+msal-extensions==1.2.0
+    # via azure-identity
+multidict==6.5.0
+    # via aiohttp
+    # via yarl
+mypy==1.14.1
 mypy-extensions==1.0.0
+    # via black
+    # via mypy
+nest-asyncio==1.6.0
 nodeenv==1.8.0
+    # via pyright
 nox==2023.4.22
-numpy==1.26.1
+numpy==2.0.2
+    # via openai
+    # via pandas
+    # via pandas-stubs
+outcome==1.3.0.post0
+    # via trio
 packaging==23.2
-pandas==2.1.1
-pandas-stubs==2.1.1.230928
-pathspec==0.11.2
+    # via black
+    # via nox
+    # via pytest
+pandas==2.2.3
+    # via openai
+pandas-stubs==2.1.4.231227
+    # via openai
+pathspec==0.12.1
+    # via black
 platformdirs==3.11.0
-pluggy==1.3.0
-portalocker==2.8.2
-py==1.11.0
-pycparser==2.21
-pydantic==2.4.2
-pydantic-core==2.10.1
+    # via black
+    # via virtualenv
+pluggy==1.5.0
+    # via pytest
+portalocker==2.10.1
+    # via msal-extensions
+propcache==0.3.2
+    # via aiohttp
+    # via yarl
+pycparser==2.22
+    # via cffi
+pydantic==2.10.3
+    # via openai
+pydantic-core==2.27.1
+    # via pydantic
+pygments==2.18.0
+    # via rich
 pyjwt==2.8.0
-pyright==1.1.332
-pytest==7.1.1
-pytest-asyncio==0.21.1
+    # via msal
+pyright==1.1.399
+pytest==8.3.3
+    # via pytest-asyncio
+    # via pytest-xdist
+pytest-asyncio==0.24.0
+pytest-xdist==3.7.0
 python-dateutil==2.8.2
+    # via pandas
+    # via time-machine
 pytz==2023.3.post1
+    # via dirty-equals
+    # via pandas
 requests==2.31.0
-respx==0.19.2
-rfc3986==1.5.0
-ruff==0.0.282
+    # via azure-core
+    # via msal
+respx==0.22.0
+rich==13.7.1
+    # via inline-snapshot
+ruff==0.9.4
+setuptools==68.2.2
+    # via nodeenv
 six==1.16.0
+    # via asttokens
+    # via azure-core
+    # via python-dateutil
 sniffio==1.3.0
+    # via anyio
+    # via openai
+    # via trio
+sortedcontainers==2.4.0
+    # via trio
+sounddevice==0.5.1
+    # via openai
 time-machine==2.9.0
-tomli==2.0.1
-tqdm==4.66.1
-types-pytz==2023.3.1.1
-types-tqdm==4.66.0.2
-typing-extensions==4.8.0
-tzdata==2023.3
-urllib3==2.0.7
+toml==0.10.2
+    # via inline-snapshot
+tomli==2.0.2
+    # via black
+    # via mypy
+    # via pytest
+tqdm==4.66.5
+    # via openai
+trio==0.27.0
+types-pyaudio==0.2.16.20240516
+types-pytz==2024.2.0.20241003
+    # via pandas-stubs
+types-toml==0.10.8.20240310
+    # via inline-snapshot
+types-tqdm==4.66.0.20240417
+typing-extensions==4.12.2
+    # via azure-core
+    # via azure-identity
+    # via black
+    # via multidict
+    # via mypy
+    # via openai
+    # via pydantic
+    # via pydantic-core
+    # via pyright
+tzdata==2024.1
+    # via pandas
+urllib3==2.2.1
+    # via requests
 virtualenv==20.24.5
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==68.2.2
+    # via nox
+websockets==15.0.1
+    # via openai
+yarl==1.20.1
+    # via aiohttp
+zipp==3.17.0
+    # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index be9606fc3c..84cb9276d8 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -5,28 +5,97 @@
 #   pre: false
 #   features: []
 #   all-features: true
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.13
+    # via httpx-aiohttp
+    # via openai
+aiosignal==1.3.2
+    # via aiohttp
 annotated-types==0.6.0
-anyio==3.7.1
+    # via pydantic
+anyio==4.1.0
+    # via httpx
+    # via openai
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.3.0
+    # via aiohttp
 certifi==2023.7.22
+    # via httpcore
+    # via httpx
+cffi==1.17.1
+    # via sounddevice
 distro==1.8.0
-exceptiongroup==1.1.3
-h11==0.12.0
-httpcore==0.15.0
-httpx==0.23.0
+    # via openai
+exceptiongroup==1.2.2
+    # via anyio
+frozenlist==1.7.0
+    # via aiohttp
+    # via aiosignal
+h11==0.14.0
+    # via httpcore
+httpcore==1.0.2
+    # via httpx
+httpx==0.28.1
+    # via httpx-aiohttp
+    # via openai
+httpx-aiohttp==0.1.6
+    # via openai
 idna==3.4
-numpy==1.26.1
-pandas==2.1.1
-pandas-stubs==2.1.1.230928
-pydantic==2.4.2
-pydantic-core==2.10.1
-python-dateutil==2.8.2
-pytz==2023.3.post1
-rfc3986==1.5.0
+    # via anyio
+    # via httpx
+    # via yarl
+jiter==0.6.1
+    # via openai
+multidict==6.5.0
+    # via aiohttp
+    # via yarl
+numpy==2.0.2
+    # via openai
+    # via pandas
+    # via pandas-stubs
+pandas==2.2.3
+    # via openai
+pandas-stubs==2.2.2.240807
+    # via openai
+propcache==0.3.2
+    # via aiohttp
+    # via yarl
+pycparser==2.22
+    # via cffi
+pydantic==2.10.3
+    # via openai
+pydantic-core==2.27.1
+    # via pydantic
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2024.1
+    # via pandas
 six==1.16.0
+    # via python-dateutil
 sniffio==1.3.0
-tqdm==4.66.1
-types-pytz==2023.3.1.1
-typing-extensions==4.8.0
-tzdata==2023.3
+    # via anyio
+    # via openai
+sounddevice==0.5.1
+    # via openai
+tqdm==4.66.5
+    # via openai
+types-pytz==2024.2.0.20241003
+    # via pandas-stubs
+typing-extensions==4.12.2
+    # via multidict
+    # via openai
+    # via pydantic
+    # via pydantic-core
+tzdata==2024.1
+    # via pandas
+websockets==15.0.1
+    # via openai
+yarl==1.20.1
+    # via aiohttp
diff --git a/scripts/bootstrap b/scripts/bootstrap
new file mode 100755
index 0000000000..9910ec05fc
--- /dev/null
+++ b/scripts/bootstrap
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+if ! command -v rye >/dev/null 2>&1 && [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+  brew bundle check >/dev/null 2>&1 || {
+    echo "==> Installing Homebrew dependencies…"
+    brew bundle
+  }
+fi
+
+echo "==> Installing Python dependencies…"
+
+# experimental uv support makes installations significantly faster
+rye config --set-bool behavior.use-uv=true
+
+rye sync
diff --git a/scripts/format b/scripts/format
new file mode 100755
index 0000000000..667ec2d7af
--- /dev/null
+++ b/scripts/format
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "==> Running formatters"
+rye run format
diff --git a/scripts/lint b/scripts/lint
new file mode 100755
index 0000000000..55bc1dd711
--- /dev/null
+++ b/scripts/lint
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "==> Running lints"
+rye run lint
+
+echo "==> Making sure it imports"
+rye run python -c 'import openai'
diff --git a/scripts/mock b/scripts/mock
new file mode 100755
index 0000000000..d2814ae6a0
--- /dev/null
+++ b/scripts/mock
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+if [[ -n "$1" && "$1" != '--'* ]]; then
+  URL="$1"
+  shift
+else
+  URL="$(grep 'openapi_spec_url' .stats.yml | cut -d' ' -f2)"
+fi
+
+# Check if the URL is empty
+if [ -z "$URL" ]; then
+  echo "Error: No OpenAPI spec path/url provided or found in .stats.yml"
+  exit 1
+fi
+
+echo "==> Starting mock server with URL ${URL}"
+
+# Run prism mock on the given spec
+if [ "$1" == "--daemon" ]; then
+  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL" &> .prism.log &
+
+  # Wait for server to come online
+  echo -n "Waiting for server"
+  while ! grep -q "✖  fatal\|Prism is listening" ".prism.log" ; do
+    echo -n "."
+    sleep 0.1
+  done
+
+  if grep -q "✖  fatal" ".prism.log"; then
+    cat .prism.log
+    exit 1
+  fi
+
+  echo
+else
+  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL"
+fi
diff --git a/scripts/test b/scripts/test
new file mode 100755
index 0000000000..2b87845670
--- /dev/null
+++ b/scripts/test
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+NC='\033[0m' # No Color
+
+function prism_is_running() {
+  curl --silent "http://localhost:4010/" >/dev/null 2>&1
+}
+
+kill_server_on_port() {
+  pids=$(lsof -t -i tcp:"$1" || echo "")
+  if [ "$pids" != "" ]; then
+    kill "$pids"
+    echo "Stopped $pids."
+  fi
+}
+
+function is_overriding_api_base_url() {
+  [ -n "$TEST_API_BASE_URL" ]
+}
+
+if ! is_overriding_api_base_url && ! prism_is_running ; then
+  # When we exit this script, make sure to kill the background mock server process
+  trap 'kill_server_on_port 4010' EXIT
+
+  # Start the dev server
+  ./scripts/mock --daemon
+fi
+
+if is_overriding_api_base_url ; then
+  echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
+  echo
+elif ! prism_is_running ; then
+  echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
+  echo -e "running against your OpenAPI spec."
+  echo
+  echo -e "To run the server, pass in the path or url of your OpenAPI"
+  echo -e "spec to the prism command:"
+  echo
+  echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
+  echo
+
+  exit 1
+else
+  echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
+  echo
+fi
+
+export DEFER_PYDANTIC_BUILD=false
+
+echo "==> Running tests"
+rye run pytest "$@"
+
+echo "==> Running Pydantic v1 tests"
+rye run nox -s test-pydantic-v1 -- "$@"
diff --git a/bin/blacken-docs.py b/scripts/utils/ruffen-docs.py
similarity index 51%
rename from bin/blacken-docs.py
rename to scripts/utils/ruffen-docs.py
index 45d0ad1225..0cf2bd2fd9 100644
--- a/bin/blacken-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -1,16 +1,14 @@
-# fork of https://github.com/asottile/blacken-docs implementing https://github.com/asottile/blacken-docs/issues/170
+# fork of https://github.com/asottile/blacken-docs adapted for ruff
 from __future__ import annotations
 
 import re
+import sys
 import argparse
 import textwrap
 import contextlib
+import subprocess
 from typing import Match, Optional, Sequence, Generator, NamedTuple, cast
 
-import black
-from black.mode import TargetVersion
-from black.const import DEFAULT_LINE_LENGTH
-
 MD_RE = re.compile(
     r"(?P<before>^(?P<indent> *)```\s*python\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)```\s*$)",
     re.DOTALL | re.MULTILINE,
@@ -19,55 +17,12 @@
     r"(?P<before>^(?P<indent> *)```\s*pycon\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)```.*$)",
     re.DOTALL | re.MULTILINE,
 )
-RST_PY_LANGS = frozenset(("python", "py", "sage", "python3", "py3", "numpy"))
-BLOCK_TYPES = "(code|code-block|sourcecode|ipython)"
-DOCTEST_TYPES = "(testsetup|testcleanup|testcode)"
-RST_RE = re.compile(
-    rf"(?P<before>"
-    rf"^(?P<indent> *)\.\. ("
-    rf"jupyter-execute::|"
-    rf"{BLOCK_TYPES}:: (?P<lang>\w+)|"
-    rf"{DOCTEST_TYPES}::.*"
-    rf")\n"
-    rf"((?P=indent) +:.*\n)*"
-    rf"\n*"
-    rf")"
-    rf"(?P<code>(^((?P=indent) +.*)?\n)+)",
-    re.MULTILINE,
-)
-RST_PYCON_RE = re.compile(
-    r"(?P<before>"
-    r"(?P<indent> *)\.\. ((code|code-block):: pycon|doctest::.*)\n"
-    r"((?P=indent) +:.*\n)*"
-    r"\n*"
-    r")"
-    r"(?P<code>(^((?P=indent) +.*)?(\n|$))+)",
-    re.MULTILINE,
-)
 PYCON_PREFIX = ">>> "
 PYCON_CONTINUATION_PREFIX = "..."
 PYCON_CONTINUATION_RE = re.compile(
     rf"^{re.escape(PYCON_CONTINUATION_PREFIX)}( |$)",
 )
-LATEX_RE = re.compile(
-    r"(?P<before>^(?P<indent> *)\\begin{minted}{python}\n)"
-    r"(?P<code>.*?)"
-    r"(?P<after>^(?P=indent)\\end{minted}\s*$)",
-    re.DOTALL | re.MULTILINE,
-)
-LATEX_PYCON_RE = re.compile(
-    r"(?P<before>^(?P<indent> *)\\begin{minted}{pycon}\n)" r"(?P<code>.*?)" r"(?P<after>^(?P=indent)\\end{minted}\s*$)",
-    re.DOTALL | re.MULTILINE,
-)
-PYTHONTEX_LANG = r"(?P<lang>pyblock|pycode|pyconsole|pyverbatim)"
-PYTHONTEX_RE = re.compile(
-    rf"(?P<before>^(?P<indent> *)\\begin{{{PYTHONTEX_LANG}}}\n)"
-    rf"(?P<code>.*?)"
-    rf"(?P<after>^(?P=indent)\\end{{(?P=lang)}}\s*$)",
-    re.DOTALL | re.MULTILINE,
-)
-INDENT_RE = re.compile("^ +(?=[^ ])", re.MULTILINE)
-TRAILING_NL_RE = re.compile(r"\n+\Z", re.MULTILINE)
+DEFAULT_LINE_LENGTH = 100
 
 
 class CodeBlockError(NamedTuple):
@@ -77,7 +32,6 @@ class CodeBlockError(NamedTuple):
 
 def format_str(
     src: str,
-    black_mode: black.FileMode,
 ) -> tuple[str, Sequence[CodeBlockError]]:
     errors: list[CodeBlockError] = []
 
@@ -91,23 +45,9 @@ def _collect_error(match: Match[str]) -> Generator[None, None, None]:
     def _md_match(match: Match[str]) -> str:
         code = textwrap.dedent(match["code"])
         with _collect_error(match):
-            code = black.format_str(code, mode=black_mode)
+            code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
-
-    def _rst_match(match: Match[str]) -> str:
-        lang = match["lang"]
-        if lang is not None and lang not in RST_PY_LANGS:
-            return match[0]
-        min_indent = min(INDENT_RE.findall(match["code"]))
-        trailing_ws_match = TRAILING_NL_RE.search(match["code"])
-        assert trailing_ws_match
-        trailing_ws = trailing_ws_match.group()
-        code = textwrap.dedent(match["code"])
-        with _collect_error(match):
-            code = black.format_str(code, mode=black_mode)
-        code = textwrap.indent(code, min_indent)
-        return f'{match["before"]}{code.rstrip()}{trailing_ws}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -119,7 +59,7 @@ def finish_fragment() -> None:
 
             if fragment is not None:
                 with _collect_error(match):
-                    fragment = black.format_str(fragment, mode=black_mode)
+                    fragment = format_code_block(fragment)
                 fragment_lines = fragment.splitlines()
                 code += f"{PYCON_PREFIX}{fragment_lines[0]}\n"
                 for line in fragment_lines[1:]:
@@ -157,44 +97,35 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
-
-    def _rst_pycon_match(match: Match[str]) -> str:
-        code = _pycon_match(match)
-        min_indent = min(INDENT_RE.findall(match["code"]))
-        code = textwrap.indent(code, min_indent)
-        return f'{match["before"]}{code}'
-
-    def _latex_match(match: Match[str]) -> str:
-        code = textwrap.dedent(match["code"])
-        with _collect_error(match):
-            code = black.format_str(code, mode=black_mode)
-        code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
-
-    def _latex_pycon_match(match: Match[str]) -> str:
-        code = _pycon_match(match)
-        code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
-    src = RST_RE.sub(_rst_match, src)
-    src = RST_PYCON_RE.sub(_rst_pycon_match, src)
-    src = LATEX_RE.sub(_latex_match, src)
-    src = LATEX_PYCON_RE.sub(_latex_pycon_match, src)
-    src = PYTHONTEX_RE.sub(_latex_match, src)
     return src, errors
 
 
+def format_code_block(code: str) -> str:
+    return subprocess.check_output(
+        [
+            sys.executable,
+            "-m",
+            "ruff",
+            "format",
+            "--stdin-filename=script.py",
+            f"--line-length={DEFAULT_LINE_LENGTH}",
+        ],
+        encoding="utf-8",
+        input=code,
+    )
+
+
 def format_file(
     filename: str,
-    black_mode: black.FileMode,
     skip_errors: bool,
 ) -> int:
     with open(filename, encoding="UTF-8") as f:
         contents = f.read()
-    new_contents, errors = format_str(contents, black_mode)
+    new_contents, errors = format_str(contents)
     for error in errors:
         lineno = contents[: error.offset].count("\n") + 1
         print(f"{filename}:{lineno}: code block parse error {error.exc}")
@@ -217,15 +148,6 @@ def main(argv: Sequence[str] | None = None) -> int:
         type=int,
         default=DEFAULT_LINE_LENGTH,
     )
-    parser.add_argument(
-        "-t",
-        "--target-version",
-        action="http://github.com/append",
-        type=lambda v: TargetVersion[v.upper()],
-        default=[],
-        help=f"choices: {[v.name.lower() for v in TargetVersion]}",
-        dest="target_versions",
-    )
     parser.add_argument(
         "-S",
         "--skip-string-normalization",
@@ -235,15 +157,9 @@ def main(argv: Sequence[str] | None = None) -> int:
     parser.add_argument("filenames", nargs="*")
     args = parser.parse_args(argv)
 
-    black_mode = black.FileMode(
-        target_versions=set(args.target_versions),
-        line_length=args.line_length,
-        string_normalization=not args.skip_string_normalization,
-    )
-
     retv = 0
     for filename in args.filenames:
-        retv |= format_file(filename, black_mode, skip_errors=args.skip_errors)
+        retv |= format_file(filename, skip_errors=args.skip_errors)
     return retv
 
 
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
new file mode 100755
index 0000000000..75198de98f
--- /dev/null
+++ b/scripts/utils/upload-artifact.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -exuo pipefail
+
+RESPONSE=$(curl -X POST "$URL" \
+  -H "Authorization: Bearer $AUTH" \
+  -H "Content-Type: application/json")
+
+SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+if [[ "$SIGNED_URL" == "null" ]]; then
+  echo -e "\033[31mFailed to get signed URL.\033[0m"
+  exit 1
+fi
+
+UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
+  -H "Content-Type: application/gzip" \
+  --data-binary @- "$SIGNED_URL" 2>&1)
+
+if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
+  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
+  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/openai-python/$SHA'\033[0m"
+else
+  echo -e "\033[31mFailed to upload artifact.\033[0m"
+  exit 1
+fi
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index d92dfe969a..226fed9554 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -1,25 +1,19 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os as _os
+import typing as _t
 from typing_extensions import override
 
 from . import types
-from ._types import NoneType, Transport, ProxiesTypes
+from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
-from ._client import (
-    Client,
-    OpenAI,
-    Stream,
-    Timeout,
-    Transport,
-    AsyncClient,
-    AsyncOpenAI,
-    AsyncStream,
-    RequestOptions,
-)
+from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
+from ._models import BaseModel
 from ._version import __title__, __version__
+from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
+from ._constants import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES, DEFAULT_CONNECTION_LIMITS
 from ._exceptions import (
     APIError,
     OpenAIError,
@@ -33,10 +27,15 @@
     AuthenticationError,
     InternalServerError,
     PermissionDeniedError,
+    LengthFinishReasonError,
     UnprocessableEntityError,
     APIResponseValidationError,
+    InvalidWebhookSignatureError,
+    ContentFilterFinishReasonError,
 )
+from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
+from ._legacy_response import HttpxBinaryResponseContent as HttpxBinaryResponseContent
 
 __all__ = [
     "types",
@@ -45,6 +44,9 @@
     "NoneType",
     "Transport",
     "ProxiesTypes",
+    "NotGiven",
+    "NOT_GIVEN",
+    "Omit",
     "OpenAIError",
     "APIError",
     "APIStatusError",
@@ -59,6 +61,9 @@
     "UnprocessableEntityError",
     "RateLimitError",
     "InternalServerError",
+    "LengthFinishReasonError",
+    "ContentFilterFinishReasonError",
+    "InvalidWebhookSignatureError",
     "Timeout",
     "RequestOptions",
     "Client",
@@ -68,13 +73,26 @@
     "OpenAI",
     "AsyncOpenAI",
     "file_from_path",
+    "BaseModel",
+    "DEFAULT_TIMEOUT",
+    "DEFAULT_MAX_RETRIES",
+    "DEFAULT_CONNECTION_LIMITS",
+    "DefaultHttpxClient",
+    "DefaultAsyncHttpxClient",
+    "DefaultAioHttpClient",
 ]
 
-from .lib import azure as _azure
+if not _t.TYPE_CHECKING:
+    from ._utils._resources_proxy import resources as resources
+
+from .lib import azure as _azure, pydantic_function_tool as pydantic_function_tool
 from .version import VERSION as VERSION
-from .lib.azure import AzureOpenAI as AzureOpenAI
-from .lib.azure import AsyncAzureOpenAI as AsyncAzureOpenAI
+from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
 from .lib._old_api import *
+from .lib.streaming import (
+    AssistantEventHandler as AssistantEventHandler,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+)
 
 _setup_logging()
 
@@ -86,7 +104,7 @@
 for __name in __all__:
     if not __name.startswith("__"):
         try:
-            setattr(__locals[__name], "__module__", "openai")
+            __locals[__name].__module__ = "openai"
         except (TypeError, AttributeError):
             # Some of our exported symbols are builtins which we can't set attributes for.
             pass
@@ -103,6 +121,10 @@
 
 organization: str | None = None
 
+project: str | None = None
+
+webhook_secret: str | None = None
+
 base_url: str | _httpx.URL | None = None
 
 timeout: float | Timeout | None = DEFAULT_TIMEOUT
@@ -154,6 +176,28 @@ def organization(self, value: str | None) -> None:  # type: ignore
 
         organization = value
 
+    @property  # type: ignore
+    @override
+    def project(self) -> str | None:
+        return project
+
+    @project.setter  # type: ignore
+    def project(self, value: str | None) -> None:  # type: ignore
+        global project
+
+        project = value
+
+    @property  # type: ignore
+    @override
+    def webhook_secret(self) -> str | None:
+        return webhook_secret
+
+    @webhook_secret.setter  # type: ignore
+    def webhook_secret(self, value: str | None) -> None:  # type: ignore
+        global webhook_secret
+
+        webhook_secret = value
+
     @property
     @override
     def base_url(http://github.com/self) -> _httpx.URL:
@@ -221,13 +265,6 @@ def _client(self, value: _httpx.Client) -> None:  # type: ignore
 
         http_client = value
 
-    @override
-    def __del__(self) -> None:
-        try:
-            super().__del__()
-        except Exception:
-            pass
-
 
 class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
     ...
@@ -312,6 +349,8 @@ def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
         _client = _ModuleClient(
             api_key=api_key,
             organization=organization,
+            project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -330,15 +369,22 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     _client = None
 
 
-from ._module_client import beta as beta
-from ._module_client import chat as chat
-from ._module_client import audio as audio
-from ._module_client import edits as edits
-from ._module_client import files as files
-from ._module_client import images as images
-from ._module_client import models as models
-from ._module_client import embeddings as embeddings
-from ._module_client import fine_tunes as fine_tunes
-from ._module_client import completions as completions
-from ._module_client import fine_tuning as fine_tuning
-from ._module_client import moderations as moderations
+from ._module_client import (
+    beta as beta,
+    chat as chat,
+    audio as audio,
+    evals as evals,
+    files as files,
+    images as images,
+    models as models,
+    batches as batches,
+    uploads as uploads,
+    webhooks as webhooks,
+    responses as responses,
+    containers as containers,
+    embeddings as embeddings,
+    completions as completions,
+    fine_tuning as fine_tuning,
+    moderations as moderations,
+    vector_stores as vector_stores,
+)
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index a168301f75..0a6385a7b5 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
-import os
+import sys
 import json
 import time
 import uuid
 import email
+import asyncio
 import inspect
 import logging
 import platform
-import warnings
 import email.utils
 from types import TracebackType
 from random import random
@@ -29,14 +29,13 @@
     cast,
     overload,
 )
-from functools import lru_cache
-from typing_extensions import Literal, override
+from typing_extensions import Literal, override, get_origin
 
 import anyio
 import httpx
 import distro
 import pydantic
-from httpx import URL, Limits
+from httpx import URL
 from pydantic import PrivateAttr
 
 from . import _exceptions
@@ -47,41 +46,47 @@
     Body,
     Omit,
     Query,
-    ModelT,
     Headers,
     Timeout,
     NotGiven,
     ResponseT,
-    Transport,
     AnyMapping,
     PostParser,
-    ProxiesTypes,
     RequestFiles,
-    AsyncTransport,
+    HttpxSendArgs,
     RequestOptions,
-    UnknownResponse,
+    HttpxRequestFiles,
     ModelBuilderProtocol,
-    BinaryResponseContent,
 )
-from ._utils import is_dict, is_given, is_mapping
-from ._compat import model_copy, model_dump
+from ._utils import SensitiveHeadersFilter, is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
+from ._compat import PYDANTIC_V2, model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
-from ._response import APIResponse
+from ._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    extract_response_type,
+)
 from ._constants import (
-    DEFAULT_LIMITS,
     DEFAULT_TIMEOUT,
+    MAX_RETRY_DELAY,
     DEFAULT_MAX_RETRIES,
+    INITIAL_RETRY_DELAY,
     RAW_RESPONSE_HEADER,
+    OVERRIDE_CAST_TO_HEADER,
+    DEFAULT_CONNECTION_LIMITS,
 )
-from ._streaming import Stream, AsyncStream
+from ._streaming import Stream, SSEDecoder, AsyncStream, SSEBytesDecoder
 from ._exceptions import (
     APIStatusError,
     APITimeoutError,
     APIConnectionError,
     APIResponseValidationError,
 )
+from ._legacy_response import LegacyAPIResponse
 
 log: logging.Logger = logging.getLogger(__name__)
+log.addFilter(SensitiveHeadersFilter())
 
 # TODO: make base page type vars covariant
 SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
@@ -95,7 +100,11 @@
 _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
 
 if TYPE_CHECKING:
-    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    from httpx._config import (
+        DEFAULT_TIMEOUT_CONFIG,  # pyright: ignore[reportPrivateImportUsage]
+    )
+
+    HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
 else:
     try:
         from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -105,41 +114,57 @@
 
 
 class PageInfo:
-    """Stores the necesary information to build the request to retrieve the next page.
+    """Stores the necessary information to build the request to retrieve the next page.
 
     Either `url` or `params` must be set.
     """
 
     url: URL | NotGiven
     params: Query | NotGiven
+    json: Body | NotGiven
 
     @overload
     def __init__(
         self,
         *,
         url: URL,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
         self,
         *,
         params: Query,
-    ) -> None:
-        ...
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        json: Body,
+    ) -> None: ...
 
     def __init__(
         self,
         *,
         url: URL | NotGiven = NOT_GIVEN,
+        json: Body | NotGiven = NOT_GIVEN,
         params: Query | NotGiven = NOT_GIVEN,
     ) -> None:
         self.url = url
+        self.json = json
         self.params = params
 
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        if self.json:
+            return f"{self.__class__.__name__}(json={self.json})"
+        return f"{self.__class__.__name__}(params={self.params})"
 
-class BasePage(GenericModel, Generic[ModelT]):
+
+class BasePage(GenericModel, Generic[_T]):
     """
     Defines the core interface for pagination.
 
@@ -152,7 +177,7 @@ class BasePage(GenericModel, Generic[ModelT]):
     """
 
     _options: FinalRequestOptions = PrivateAttr()
-    _model: Type[ModelT] = PrivateAttr()
+    _model: Type[_T] = PrivateAttr()
 
     def has_next_page(self) -> bool:
         items = self._get_page_items()
@@ -160,10 +185,9 @@ def has_next_page(self) -> bool:
             return False
         return self.next_page_info() is not None
 
-    def next_page_info(self) -> Optional[PageInfo]:
-        ...
+    def next_page_info(self) -> Optional[PageInfo]: ...
 
-    def _get_page_items(self) -> Iterable[ModelT]:  # type: ignore[empty-body]
+    def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
         ...
 
     def _params_from_url(http://github.com/self,%20url:%20URL) -> httpx.QueryParams:
@@ -185,18 +209,34 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
             options.url = str(url)
             return options
 
+        if not isinstance(info.json, NotGiven):
+            if not is_mapping(info.json):
+                raise TypeError("Pagination is only supported with mappings")
+
+            if not options.json_data:
+                options.json_data = {**info.json}
+            else:
+                if not is_mapping(options.json_data):
+                    raise TypeError("Pagination is only supported with mappings")
+
+                options.json_data = {**options.json_data, **info.json}
+            return options
+
         raise ValueError("Unexpected PageInfo state")
 
 
-class BaseSyncPage(BasePage[ModelT], Generic[ModelT]):
+class BaseSyncPage(BasePage[_T], Generic[_T]):
     _client: SyncAPIClient = pydantic.PrivateAttr()
 
     def _set_private_attributes(
         self,
         client: SyncAPIClient,
-        model: Type[ModelT],
+        model: Type[_T],
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -209,7 +249,7 @@ def _set_private_attributes(
     # methods should continue to work as expected as there is an alternative method
     # to cast a model to a dictionary, model.dict(), which is used internally
     # by pydantic.
-    def __iter__(self) -> Iterator[ModelT]:  # type: ignore
+    def __iter__(self) -> Iterator[_T]:  # type: ignore
         for page in self.iter_pages():
             for item in page._get_page_items():
                 yield item
@@ -234,13 +274,13 @@ def get_next_page(self: SyncPageT) -> SyncPageT:
         return self._client._request_api_list(self._model, page=self.__class__, options=options)
 
 
-class AsyncPaginator(Generic[ModelT, AsyncPageT]):
+class AsyncPaginator(Generic[_T, AsyncPageT]):
     def __init__(
         self,
         client: AsyncAPIClient,
         options: FinalRequestOptions,
         page_cls: Type[AsyncPageT],
-        model: Type[ModelT],
+        model: Type[_T],
     ) -> None:
         self._model = model
         self._client = client
@@ -263,7 +303,7 @@ def _parser(resp: AsyncPageT) -> AsyncPageT:
 
         return await self._client.request(self._page_cls, self._options)
 
-    async def __aiter__(self) -> AsyncIterator[ModelT]:
+    async def __aiter__(self) -> AsyncIterator[_T]:
         # https://github.com/microsoft/pyright/issues/3464
         page = cast(
             AsyncPageT,
@@ -273,20 +313,23 @@ async def __aiter__(self) -> AsyncIterator[ModelT]:
             yield item
 
 
-class BaseAsyncPage(BasePage[ModelT], Generic[ModelT]):
+class BaseAsyncPage(BasePage[_T], Generic[_T]):
     _client: AsyncAPIClient = pydantic.PrivateAttr()
 
     def _set_private_attributes(
         self,
-        model: Type[ModelT],
+        model: Type[_T],
         client: AsyncAPIClient,
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
 
-    async def __aiter__(self) -> AsyncIterator[ModelT]:
+    async def __aiter__(self) -> AsyncIterator[_T]:
         async for page in self.iter_pages():
             for item in page._get_page_items():
                 yield item
@@ -321,9 +364,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
     _base_url: URL
     max_retries: int
     timeout: Union[float, Timeout, None]
-    _limits: httpx.Limits
-    _proxies: ProxiesTypes | None
-    _transport: Transport | AsyncTransport | None
     _strict_response_validation: bool
     _idempotency_header: str | None
     _default_stream_cls: type[_DefaultStreamT] | None = None
@@ -336,9 +376,6 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None = DEFAULT_TIMEOUT,
-        limits: httpx.Limits,
-        transport: Transport | AsyncTransport | None,
-        proxies: ProxiesTypes | None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
@@ -346,13 +383,16 @@ def __init__(
         self._base_url = self._enforce_trailing_slash(URL(base_url))
         self.max_retries = max_retries
         self.timeout = timeout
-        self._limits = limits
-        self._proxies = proxies
-        self._transport = transport
         self._custom_headers = custom_headers or {}
         self._custom_query = custom_query or {}
         self._strict_response_validation = _strict_response_validation
         self._idempotency_header = None
+        self._platform: Platform | None = None
+
+        if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
+            raise TypeError(
+                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `openai.DEFAULT_MAX_RETRIES`"
+            )
 
     def _enforce_trailing_slash(self, url: URL) -> URL:
         if url.raw_path.endswith(b"/"):
@@ -363,14 +403,21 @@ def _make_status_error_from_response(
         self,
         response: httpx.Response,
     ) -> APIStatusError:
-        err_text = response.text.strip()
-        body = err_text
+        if response.is_closed and not response.is_stream_consumed:
+            # We can't read the response body as it has been closed
+            # before it was read. This can happen if an event hook
+            # raises a status error.
+            body = None
+            err_msg = f"Error code: {response.status_code}"
+        else:
+            err_text = response.text.strip()
+            body = err_text
 
-        try:
-            body = json.loads(err_text)
-            err_msg = f"Error code: {response.status_code} - {body}"
-        except Exception:
-            err_msg = err_text or f"Error code: {response.status_code}"
+            try:
+                body = json.loads(err_text)
+                err_msg = f"Error code: {response.status_code} - {body}"
+            except Exception:
+                err_msg = err_text or f"Error code: {response.status_code}"
 
         return self._make_status_error(err_msg, body=body, response=response)
 
@@ -383,27 +430,30 @@ def _make_status_error(
     ) -> _exceptions.APIStatusError:
         raise NotImplementedError()
 
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
         custom_headers = options.headers or {}
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
 
+        # headers are case-insensitive while dictionaries are not.
         headers = httpx.Headers(headers_dict)
 
         idempotency_header = self._idempotency_header
-        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            if not options.idempotency_key:
-                options.idempotency_key = self._idempotency_key()
-
+        if idempotency_header and options.idempotency_key and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key
 
+        # Don't set these headers if they were already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
+            headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
+
         return headers
 
     def _prepare_url(http://github.com/self,%20url:%20str) -> URL:
@@ -419,9 +469,14 @@ def _prepare_url(http://github.com/self,%20url:%20str) -> URL:
 
         return merge_url
 
+    def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
+        return SSEDecoder()
+
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
             log.debug("Request options: %s", model_dump(options, exclude_unset=True))
@@ -437,19 +492,24 @@ def _build_request(
             else:
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
-        headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
+        headers = self._build_headers(options, retries_taken=retries_taken)
+        params = _merge_mappings(self.default_query, options.params)
+        content_type = headers.get("Content-Type")
+        files = options.files
 
         # If the given Content-Type header is multipart/form-data then it
         # has to be removed so that httpx can generate the header with
         # additional information for us as it has to be in this form
         # for the server to be able to correctly parse the request:
         # multipart/form-data; boundary=---abc--
-        if headers.get("Content-Type") == "multipart/form-data":
-            headers.pop("Content-Type")
+        if content_type is not None and content_type.startswith("multipart/form-data"):
+            if "boundary" not in content_type:
+                # only remove the header if the boundary hasn't been explicitly set
+                # as the caller doesn't want httpx to come up with their own boundary
+                headers.pop("Content-Type")
 
             # As we are now sending multipart/form-data instead of application/json
-            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding
             if json_data:
                 if not is_dict(json_data):
                     raise TypeError(
@@ -457,19 +517,33 @@ def _build_request(
                     )
                 kwargs["data"] = self._serialize_multipartform(json_data)
 
+            # httpx determines whether or not to send a "multipart/form-data"
+            # request based on the truthiness of the "files" argument.
+            # This gets around that issue by generating a dict value that
+            # evaluates to true.
+            #
+            # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186
+            if not files:
+                files = cast(HttpxRequestFiles, ForceMultipartDict())
+
+        prepared_url = self._prepare_url(http://github.com/options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
             timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
             method=options.method,
-            url=self._prepare_url(http://github.com/options.url),
+            url=prepared_url,
             # the `Query` type that we use is incompatible with qs'
             # `Params` type as it needs to be typed as `Mapping[str, object]`
             # so that passing a `TypedDict` doesn't cause an error.
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data,
-            files=options.files,
+            json=json_data if is_given(json_data) else None,
+            files=files,
             **kwargs,
         )
 
@@ -482,33 +556,46 @@ def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, o
         )
         serialized: dict[str, object] = {}
         for key, value in items:
-            if key in serialized:
-                raise ValueError(f"Duplicate key encountered: {key}; This behaviour is not supported")
-            serialized[key] = value
+            existing = serialized.get(key)
+
+            if not existing:
+                serialized[key] = value
+                continue
+
+            # If a value has already been set for this key then that
+            # means we're sending data like `array[]=[1, 2, 3]` and we
+            # need to tell httpx that we want to send multiple values with
+            # the same key which is done by using a list or a tuple.
+            #
+            # Note: 2d arrays should never result in the same key at both
+            # levels so it's safe to assume that if the value is a list,
+            # it was because we changed it to be a list.
+            if is_list(existing):
+                existing.append(value)
+            else:
+                serialized[key] = [existing, value]
+
         return serialized
 
-    def _process_response(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        response: httpx.Response,
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-    ) -> ResponseT:
-        api_response = APIResponse(
-            raw=response,
-            client=self,
-            cast_to=cast_to,
-            stream=stream,
-            stream_cls=stream_cls,
-            options=options,
-        )
+    def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
+        if not is_given(options.headers):
+            return cast_to
 
-        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
-            return cast(ResponseT, api_response)
+        # make a copy of the headers so we don't mutate user-input
+        headers = dict(options.headers)
 
-        return api_response.parse()
+        # we internally support defining a temporary header to override the
+        # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
+        # see _response.py for implementation details
+        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
+        if is_given(override_cast_to):
+            options.headers = headers
+            return cast(Type[ResponseT], override_cast_to)
+
+        return cast_to
+
+    def _should_stream_response_body(self, request: httpx.Request) -> bool:
+        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
 
     def _process_response_data(
         self,
@@ -520,7 +607,7 @@ def _process_response_data(
         if data is None:
             return cast(ResponseT, None)
 
-        if cast_to is UnknownResponse:
+        if cast_to is object:
             return cast(ResponseT, data)
 
         try:
@@ -557,6 +644,12 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
     def _validate_headers(
         self,
         headers: Headers,  # noqa: ARG002
@@ -578,18 +671,47 @@ def base_url(http://github.com/self) -> URL:
 
     @base_url.setter
     def base_url(http://github.com/self,%20url:%20URL%20|%20str) -> None:
-        self._client.base_url = url if isinstance(url, URL) else URL(url)
+        self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
 
-    @lru_cache(maxsize=None)
     def platform_headers(self) -> Dict[str, str]:
-        return {
-            "X-Stainless-Lang": "python",
-            "X-Stainless-Package-Version": self._version,
-            "X-Stainless-OS": str(get_platform()),
-            "X-Stainless-Arch": str(get_architecture()),
-            "X-Stainless-Runtime": platform.python_implementation(),
-            "X-Stainless-Runtime-Version": platform.python_version(),
-        }
+        # the actual implementation is in a separate `lru_cache` decorated
+        # function because adding `lru_cache` to methods will leak memory
+        # https://github.com/python/cpython/issues/88476
+        return platform_headers(self._version, platform=self._platform)
+
+    def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
+        """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
+
+        About the Retry-After header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
+        See also  https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax
+        """
+        if response_headers is None:
+            return None
+
+        # First, try the non-standard `retry-after-ms` header for milliseconds,
+        # which is more precise than integer-seconds `retry-after`
+        try:
+            retry_ms_header = response_headers.get("retry-after-ms", None)
+            return float(retry_ms_header) / 1000
+        except (TypeError, ValueError):
+            pass
+
+        # Next, try parsing `retry-after` header as seconds (allowing nonstandard floats).
+        retry_header = response_headers.get("retry-after")
+        try:
+            # note: the spec indicates that this should only ever be an integer
+            # but if someone sends a float there's no reason for us to not respect it
+            return float(retry_header)
+        except (TypeError, ValueError):
+            pass
+
+        # Last, try parsing `retry-after` as a date.
+        retry_date_tuple = email.utils.parsedate_tz(retry_header)
+        if retry_date_tuple is None:
+            return None
+
+        retry_date = email.utils.mktime_tz(retry_date_tuple)
+        return float(retry_date - time.time())
 
     def _calculate_retry_timeout(
         self,
@@ -598,38 +720,17 @@ def _calculate_retry_timeout(
         response_headers: Optional[httpx.Headers] = None,
     ) -> float:
         max_retries = options.get_max_retries(self.max_retries)
-        try:
-            # About the Retry-After header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
-            #
-            # <http-date>". See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax for
-            # details.
-            if response_headers is not None:
-                retry_header = response_headers.get("retry-after")
-                try:
-                    retry_after = int(retry_header)
-                except Exception:
-                    retry_date_tuple = email.utils.parsedate_tz(retry_header)
-                    if retry_date_tuple is None:
-                        retry_after = -1
-                    else:
-                        retry_date = email.utils.mktime_tz(retry_date_tuple)
-                        retry_after = int(retry_date - time.time())
-            else:
-                retry_after = -1
-
-        except Exception:
-            retry_after = -1
 
         # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
-        if 0 < retry_after <= 60:
+        retry_after = self._parse_retry_after_header(response_headers)
+        if retry_after is not None and 0 < retry_after <= 60:
             return retry_after
 
-        initial_retry_delay = 0.5
-        max_retry_delay = 8.0
-        nb_retries = max_retries - remaining_retries
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
 
         # Apply exponential backoff, but not more than the max.
-        sleep_seconds = min(initial_retry_delay * pow(2.0, nb_retries), max_retry_delay)
+        sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
 
         # Apply some jitter, plus-or-minus half a second.
         jitter = 1 - 0.25 * random()
@@ -642,35 +743,72 @@ def _should_retry(self, response: httpx.Response) -> bool:
 
         # If the server explicitly says whether or not to retry, obey.
         if should_retry_header == "true":
+            log.debug("Retrying as header `x-should-retry` is set to `true`")
             return True
         if should_retry_header == "false":
+            log.debug("Not retrying as header `x-should-retry` is set to `false`")
             return False
 
         # Retry on request timeouts.
         if response.status_code == 408:
+            log.debug("Retrying due to status code %i", response.status_code)
             return True
 
         # Retry on lock timeouts.
         if response.status_code == 409:
+            log.debug("Retrying due to status code %i", response.status_code)
             return True
 
         # Retry on rate limits.
         if response.status_code == 429:
+            log.debug("Retrying due to status code %i", response.status_code)
             return True
 
         # Retry internal errors.
         if response.status_code >= 500:
+            log.debug("Retrying due to status code %i", response.status_code)
             return True
 
+        log.debug("Not retrying")
         return False
 
     def _idempotency_key(self) -> str:
         return f"stainless-python-retry-{uuid.uuid4()}"
 
 
+class _DefaultHttpxClient(httpx.Client):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultHttpxClient = httpx.Client
+    """An alias to `httpx.Client` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.Client` will result in httpx's defaults being used, not ours.
+    """
+else:
+    DefaultHttpxClient = _DefaultHttpxClient
+
+
+class SyncHttpxClientWrapper(DefaultHttpxClient):
+    def __del__(self) -> None:
+        if self.is_closed:
+            return
+
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
 class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
     _client: httpx.Client
-    _has_custom_http_client: bool
     _default_stream_cls: type[Stream[Any]] | None = None
 
     def __init__(
@@ -680,43 +818,11 @@ def __init__(
         base_url: str | URL,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: Transport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
         _strict_response_validation: bool,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -730,28 +836,26 @@ def __init__(
             else:
                 timeout = DEFAULT_TIMEOUT
 
+        if http_client is not None and not isinstance(http_client, httpx.Client):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.Client` but got {type(http_client)}"
+            )
+
         super().__init__(
             version=version,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
             base_url=base_url,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.Client(
+        self._client = http_client or SyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -780,9 +884,9 @@ def __exit__(
     def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     def _prepare_request(
         self,
@@ -800,147 +904,235 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[True],
         stream_cls: Type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: Type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
-        return self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
-
-    def _request(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: int | None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        self._prepare_options(options)
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
+
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
+
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = self._prepare_options(options)
+
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            self._prepare_request(request)
+
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
+
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
+
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+
+            response = None
+            try:
+                response = self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
+                )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        self._prepare_request(request)
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
 
-        try:
-            response = self._client.send(request, auth=self.custom_auth, stream=stream)
             log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
             )
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            if retries > 0 and self._should_retry(err.response):
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
+
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    err.response.close()
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            err.response.read()
-            raise self._make_status_error_from_response(err.response) from None
-        except httpx.TimeoutException as err:
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
-            raise APIConnectionError(request=request) from err
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    err.response.read()
+
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
+
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return self._process_response(
             cast_to=cast_to,
             options=options,
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
-    def _retry_request(
+    def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
+        if remaining_retries == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining_retries)
+
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        time.sleep(timeout)
+
+    def _process_response(
         self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: Optional[httpx.Headers] = None,
         *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
         stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
-        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
 
-        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
-        # different thread if necessary.
-        time.sleep(timeout)
+        origin = get_origin(cast_to) or cast_to
+
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
+            if not issubclass(origin, APIResponse):
+                raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                ResponseT,
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
 
-        return self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = APIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
             stream=stream,
             stream_cls=stream_cls,
+            options=options,
+            retries_taken=retries_taken,
         )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return api_response.parse()
 
     def _request_api_list(
         self,
-        model: Type[ModelT],
+        model: Type[object],
         page: Type[SyncPageT],
         options: FinalRequestOptions,
     ) -> SyncPageT:
@@ -964,8 +1156,7 @@ def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def get(
@@ -976,8 +1167,7 @@ def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def get(
@@ -988,8 +1178,7 @@ def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def get(
         self,
@@ -1015,8 +1204,7 @@ def post(
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def post(
@@ -1029,8 +1217,7 @@ def post(
         files: RequestFiles | None = None,
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def post(
@@ -1043,8 +1230,7 @@ def post(
         files: RequestFiles | None = None,
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def post(
         self,
@@ -1102,7 +1288,7 @@ def get_api_list(
         self,
         path: str,
         *,
-        model: Type[ModelT],
+        model: Type[object],
         page: Type[SyncPageT],
         body: Body | None = None,
         options: RequestOptions = {},
@@ -1112,9 +1298,62 @@ def get_api_list(
         return self._request_api_list(model, page, opts)
 
 
+class _DefaultAsyncHttpxClient(httpx.AsyncClient):
+    def __init__(self, **kwargs: Any) -> None:
+        kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+        kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+        kwargs.setdefault("follow_redirects", True)
+        super().__init__(**kwargs)
+
+
+try:
+    import httpx_aiohttp
+except ImportError:
+
+    class _DefaultAioHttpClient(httpx.AsyncClient):
+        def __init__(self, **_kwargs: Any) -> None:
+            raise RuntimeError("To use the aiohttp client you must have installed the package with the `aiohttp` extra")
+else:
+
+    class _DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore
+        def __init__(self, **kwargs: Any) -> None:
+            kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+            kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+            kwargs.setdefault("follow_redirects", True)
+
+            super().__init__(**kwargs)
+
+
+if TYPE_CHECKING:
+    DefaultAsyncHttpxClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
+    uses internally.
+
+    This is useful because overriding the `http_client` with your own instance of
+    `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
+    """
+
+    DefaultAioHttpClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that changes the default HTTP transport to `aiohttp`."""
+else:
+    DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+    DefaultAioHttpClient = _DefaultAioHttpClient
+
+
+class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
+    def __del__(self) -> None:
+        if self.is_closed:
+            return
+
+        try:
+            # TODO(someday): support non asyncio runtimes here
+            asyncio.get_running_loop().create_task(self.aclose())
+        except Exception:
+            pass
+
+
 class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
     _client: httpx.AsyncClient
-    _has_custom_http_client: bool
     _default_stream_cls: type[AsyncStream[Any]] | None = None
 
     def __init__(
@@ -1125,42 +1364,10 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: AsyncTransport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.AsyncClient | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -1174,28 +1381,26 @@ def __init__(
             else:
                 timeout = DEFAULT_TIMEOUT
 
+        if http_client is not None and not isinstance(http_client, httpx.AsyncClient):  # pyright: ignore[reportUnnecessaryIsInstance]
+            raise TypeError(
+                f"Invalid `http_client` argument; Expected an instance of `httpx.AsyncClient` but got {type(http_client)}"
+            )
+
         super().__init__(
             version=version,
             base_url=base_url,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.AsyncClient(
+        self._client = http_client or AsyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -1221,9 +1426,9 @@ async def __aexit__(
     async def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     async def _prepare_request(
         self,
@@ -1243,9 +1448,7 @@ async def request(
         options: FinalRequestOptions,
         *,
         stream: Literal[False] = False,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def request(
@@ -1255,9 +1458,7 @@ async def request(
         *,
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-        remaining_retries: Optional[int] = None,
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def request(
@@ -1267,9 +1468,7 @@ async def request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def request(
         self,
@@ -1278,103 +1477,212 @@ async def request(
         *,
         stream: bool = False,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT:
-        return await self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
-
-    async def _request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
-    ) -> ResponseT | _AsyncStreamT:
-        await self._prepare_options(options)
+        if self._platform is None:
+            # `get_platform` can make blocking IO calls so we
+            # execute it earlier while we are in an async context
+            self._platform = await asyncify(get_platform)()
+
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
+
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
+
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = await self._prepare_options(options)
+
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            await self._prepare_request(request)
+
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
+
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
+
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+
+            response = None
+            try:
+                response = await self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
+                )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        await self._prepare_request(request)
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
 
-        try:
-            response = await self._client.send(request, auth=self.custom_auth, stream=stream)
             log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
             )
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            if retries > 0 and self._should_retry(err.response):
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
+
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    await err.response.aclose()
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            await err.response.aread()
-            raise self._make_status_error_from_response(err.response) from None
-        except httpx.ConnectTimeout as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APITimeoutError(request=request) from err
-        except httpx.TimeoutException as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APIConnectionError(request=request) from err
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    await err.response.aread()
 
-        return self._process_response(
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
+
+            break
+
+        assert response is not None, "could not resolve response (should never happen)"
+        return await self._process_response(
             cast_to=cast_to,
             options=options,
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
-    async def _retry_request(
+    async def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
+        if remaining_retries == 1:
+            log.debug("1 retry left")
+        else:
+            log.debug("%i retries left", remaining_retries)
+
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
+        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+
+        await anyio.sleep(timeout)
+
+    async def _process_response(
         self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: Optional[httpx.Headers] = None,
         *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
         stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-    ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
-        log.info("Retrying request to %s in %f seconds", options.url, timeout)
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
 
-        await anyio.sleep(timeout)
+        origin = get_origin(cast_to) or cast_to
+
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
+            if not issubclass(origin, AsyncAPIResponse):
+                raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                "ResponseT",
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                    retries_taken=retries_taken,
+                ),
+            )
 
-        return await self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = AsyncAPIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
             stream=stream,
             stream_cls=stream_cls,
+            options=options,
+            retries_taken=retries_taken,
         )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return await api_response.parse()
 
     def _request_api_list(
         self,
-        model: Type[ModelT],
+        model: Type[_T],
         page: Type[AsyncPageT],
         options: FinalRequestOptions,
-    ) -> AsyncPaginator[ModelT, AsyncPageT]:
+    ) -> AsyncPaginator[_T, AsyncPageT]:
         return AsyncPaginator(client=self, options=options, page_cls=page, model=model)
 
     @overload
@@ -1385,8 +1693,7 @@ async def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def get(
@@ -1397,8 +1704,7 @@ async def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def get(
@@ -1409,8 +1715,7 @@ async def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def get(
         self,
@@ -1434,8 +1739,7 @@ async def post(
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def post(
@@ -1448,8 +1752,7 @@ async def post(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def post(
@@ -1462,8 +1765,7 @@ async def post(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def post(
         self,
@@ -1521,13 +1823,12 @@ def get_api_list(
         self,
         path: str,
         *,
-        # TODO: support paginating `str`
-        model: Type[ModelT],
+        model: Type[_T],
         page: Type[AsyncPageT],
         body: Body | None = None,
         options: RequestOptions = {},
         method: str = "get",
-    ) -> AsyncPaginator[ModelT, AsyncPageT]:
+    ) -> AsyncPaginator[_T, AsyncPageT]:
         opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
         return self._request_api_list(model, page, opts)
 
@@ -1569,6 +1870,11 @@ def make_request_options(
     return options
 
 
+class ForceMultipartDict(Dict[str, None]):
+    def __bool__(self) -> bool:
+        return True
+
+
 class OtherPlatform:
     def __init__(self, name: str) -> None:
         self.name = name
@@ -1594,8 +1900,12 @@ def __str__(self) -> str:
 
 
 def get_platform() -> Platform:
-    system = platform.system().lower()
-    platform_name = platform.platform().lower()
+    try:
+        system = platform.system().lower()
+        platform_name = platform.platform().lower()
+    except Exception:
+        return "Unknown"
+
     if "iphone" in platform_name or "ipad" in platform_name:
         # Tested using Python3IDE on an iPhone 11 and Pythonista on an iPad 7
         # system is Darwin and platform_name is a string like:
@@ -1631,6 +1941,18 @@ def get_platform() -> Platform:
     return "Unknown"
 
 
+@lru_cache(maxsize=None)
+def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]:
+    return {
+        "X-Stainless-Lang": "python",
+        "X-Stainless-Package-Version": version,
+        "X-Stainless-OS": str(platform or get_platform()),
+        "X-Stainless-Arch": str(get_architecture()),
+        "X-Stainless-Runtime": get_python_runtime(),
+        "X-Stainless-Runtime-Version": get_python_version(),
+    }
+
+
 class OtherArch:
     def __init__(self, name: str) -> None:
         self.name = name
@@ -1643,9 +1965,26 @@ def __str__(self) -> str:
 Arch = Union[OtherArch, Literal["x32", "x64", "arm", "arm64", "unknown"]]
 
 
+def get_python_runtime() -> str:
+    try:
+        return platform.python_implementation()
+    except Exception:
+        return "unknown"
+
+
+def get_python_version() -> str:
+    try:
+        return platform.python_version()
+    except Exception:
+        return "unknown"
+
+
 def get_architecture() -> Arch:
-    python_bitness, _ = platform.architecture()
-    machine = platform.machine().lower()
+    try:
+        machine = platform.machine().lower()
+    except Exception:
+        return "unknown"
+
     if machine in ("arm64", "aarch64"):
         return "arm64"
 
@@ -1657,7 +1996,7 @@ def get_architecture() -> Arch:
         return "x64"
 
     # TODO: untested
-    if python_bitness == "32bit":
+    if sys.maxsize <= 2**32:
         return "x32"
 
     if machine:
@@ -1676,105 +2015,3 @@ def _merge_mappings(
     """
     merged = {**obj1, **obj2}
     return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
-
-
-class HttpxBinaryResponseContent(BinaryResponseContent):
-    response: httpx.Response
-
-    def __init__(self, response: httpx.Response) -> None:
-        self.response = response
-
-    @property
-    @override
-    def content(self) -> bytes:
-        return self.response.content
-
-    @property
-    @override
-    def text(self) -> str:
-        return self.response.text
-
-    @property
-    @override
-    def encoding(self) -> Optional[str]:
-        return self.response.encoding
-
-    @property
-    @override
-    def charset_encoding(self) -> Optional[str]:
-        return self.response.charset_encoding
-
-    @override
-    def json(self, **kwargs: Any) -> Any:
-        return self.response.json(**kwargs)
-
-    @override
-    def read(self) -> bytes:
-        return self.response.read()
-
-    @override
-    def iter_bytes(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        return self.response.iter_bytes(chunk_size)
-
-    @override
-    def iter_text(self, chunk_size: Optional[int] = None) -> Iterator[str]:
-        return self.response.iter_text(chunk_size)
-
-    @override
-    def iter_lines(self) -> Iterator[str]:
-        return self.response.iter_lines()
-
-    @override
-    def iter_raw(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        return self.response.iter_raw(chunk_size)
-
-    @override
-    def stream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        with open(file, mode="wb") as f:
-            for data in self.response.iter_bytes(chunk_size):
-                f.write(data)
-
-    @override
-    def close(self) -> None:
-        return self.response.close()
-
-    @override
-    async def aread(self) -> bytes:
-        return await self.response.aread()
-
-    @override
-    async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_bytes(chunk_size)
-
-    @override
-    async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
-        return self.response.aiter_text(chunk_size)
-
-    @override
-    async def aiter_lines(self) -> AsyncIterator[str]:
-        return self.response.aiter_lines()
-
-    @override
-    async def aiter_raw(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_raw(chunk_size)
-
-    @override
-    async def astream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        path = anyio.Path(file)
-        async with await path.open(mode="wb") as f:
-            async for data in self.response.aiter_bytes(chunk_size):
-                await f.write(data)
-
-    @override
-    async def aclose(self) -> None:
-        return await self.response.aclose()
diff --git a/src/openai/_client.py b/src/openai/_client.py
index aa00073281..ed9b46f4b0 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -1,15 +1,14 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
-import asyncio
-from typing import Any, Union, Mapping
+from typing import TYPE_CHECKING, Any, Union, Mapping
 from typing_extensions import Self, override
 
 import httpx
 
-from . import resources, _exceptions
+from . import _exceptions
 from ._qs import Querystring
 from ._types import (
     NOT_GIVEN,
@@ -20,56 +19,92 @@
     ProxiesTypes,
     RequestOptions,
 )
-from ._utils import is_given, is_mapping
+from ._utils import (
+    is_given,
+    is_mapping,
+    get_async_library,
+)
+from ._compat import cached_property
 from ._version import __version__
-from ._streaming import Stream as Stream
-from ._streaming import AsyncStream as AsyncStream
+from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import OpenAIError, APIStatusError
-from ._base_client import DEFAULT_MAX_RETRIES, SyncAPIClient, AsyncAPIClient
+from ._base_client import (
+    DEFAULT_MAX_RETRIES,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
 
-__all__ = [
-    "Timeout",
-    "Transport",
-    "ProxiesTypes",
-    "RequestOptions",
-    "resources",
-    "OpenAI",
-    "AsyncOpenAI",
-    "Client",
-    "AsyncClient",
-]
+if TYPE_CHECKING:
+    from .resources import (
+        beta,
+        chat,
+        audio,
+        evals,
+        files,
+        images,
+        models,
+        batches,
+        uploads,
+        responses,
+        containers,
+        embeddings,
+        completions,
+        fine_tuning,
+        moderations,
+        vector_stores,
+    )
+    from .resources.files import Files, AsyncFiles
+    from .resources.images import Images, AsyncImages
+    from .resources.models import Models, AsyncModels
+    from .resources.batches import Batches, AsyncBatches
+    from .resources.webhooks import Webhooks, AsyncWebhooks
+    from .resources.beta.beta import Beta, AsyncBeta
+    from .resources.chat.chat import Chat, AsyncChat
+    from .resources.embeddings import Embeddings, AsyncEmbeddings
+    from .resources.audio.audio import Audio, AsyncAudio
+    from .resources.completions import Completions, AsyncCompletions
+    from .resources.evals.evals import Evals, AsyncEvals
+    from .resources.moderations import Moderations, AsyncModerations
+    from .resources.uploads.uploads import Uploads, AsyncUploads
+    from .resources.responses.responses import Responses, AsyncResponses
+    from .resources.containers.containers import Containers, AsyncContainers
+    from .resources.fine_tuning.fine_tuning import FineTuning, AsyncFineTuning
+    from .resources.vector_stores.vector_stores import VectorStores, AsyncVectorStores
+
+__all__ = ["Timeout", "Transport", "ProxiesTypes", "RequestOptions", "OpenAI", "AsyncOpenAI", "Client", "AsyncClient"]
 
 
 class OpenAI(SyncAPIClient):
-    completions: resources.Completions
-    chat: resources.Chat
-    edits: resources.Edits
-    embeddings: resources.Embeddings
-    files: resources.Files
-    images: resources.Images
-    audio: resources.Audio
-    moderations: resources.Moderations
-    models: resources.Models
-    fine_tuning: resources.FineTuning
-    fine_tunes: resources.FineTunes
-    beta: resources.Beta
-    with_raw_response: OpenAIWithRawResponse
-
     # client options
     api_key: str
     organization: str | None
+    project: str | None
+    webhook_secret: str | None
+
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com/' becomes
+    'wss://example.com'
+    """
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
         http_client: httpx.Client | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
@@ -81,11 +116,13 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new synchronous openai client instance.
+        """Construct a new synchronous OpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -99,6 +136,16 @@ def __init__(
             organization = os.environ.get("OPENAI_ORG_ID")
         self.organization = organization
 
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -117,29 +164,128 @@ def __init__(
 
         self._default_stream_cls = Stream
 
-        self.completions = resources.Completions(self)
-        self.chat = resources.Chat(self)
-        self.edits = resources.Edits(self)
-        self.embeddings = resources.Embeddings(self)
-        self.files = resources.Files(self)
-        self.images = resources.Images(self)
-        self.audio = resources.Audio(self)
-        self.moderations = resources.Moderations(self)
-        self.models = resources.Models(self)
-        self.fine_tuning = resources.FineTuning(self)
-        self.fine_tunes = resources.FineTunes(self)
-        self.beta = resources.Beta(self)
-        self.with_raw_response = OpenAIWithRawResponse(self)
+    @cached_property
+    def completions(self) -> Completions:
+        from .resources.completions import Completions
+
+        return Completions(self)
+
+    @cached_property
+    def chat(self) -> Chat:
+        from .resources.chat import Chat
+
+        return Chat(self)
+
+    @cached_property
+    def embeddings(self) -> Embeddings:
+        from .resources.embeddings import Embeddings
+
+        return Embeddings(self)
+
+    @cached_property
+    def files(self) -> Files:
+        from .resources.files import Files
+
+        return Files(self)
+
+    @cached_property
+    def images(self) -> Images:
+        from .resources.images import Images
+
+        return Images(self)
+
+    @cached_property
+    def audio(self) -> Audio:
+        from .resources.audio import Audio
+
+        return Audio(self)
+
+    @cached_property
+    def moderations(self) -> Moderations:
+        from .resources.moderations import Moderations
+
+        return Moderations(self)
+
+    @cached_property
+    def models(self) -> Models:
+        from .resources.models import Models
+
+        return Models(self)
+
+    @cached_property
+    def fine_tuning(self) -> FineTuning:
+        from .resources.fine_tuning import FineTuning
+
+        return FineTuning(self)
+
+    @cached_property
+    def vector_stores(self) -> VectorStores:
+        from .resources.vector_stores import VectorStores
+
+        return VectorStores(self)
+
+    @cached_property
+    def webhooks(self) -> Webhooks:
+        from .resources.webhooks import Webhooks
+
+        return Webhooks(self)
+
+    @cached_property
+    def beta(self) -> Beta:
+        from .resources.beta import Beta
+
+        return Beta(self)
+
+    @cached_property
+    def batches(self) -> Batches:
+        from .resources.batches import Batches
+
+        return Batches(self)
+
+    @cached_property
+    def uploads(self) -> Uploads:
+        from .resources.uploads import Uploads
+
+        return Uploads(self)
+
+    @cached_property
+    def responses(self) -> Responses:
+        from .resources.responses import Responses
+
+        return Responses(self)
+
+    @cached_property
+    def evals(self) -> Evals:
+        from .resources.evals import Evals
+
+        return Evals(self)
+
+    @cached_property
+    def containers(self) -> Containers:
+        from .resources.containers import Containers
+
+        return Containers(self)
+
+    @cached_property
+    def with_raw_response(self) -> OpenAIWithRawResponse:
+        return OpenAIWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> OpenAIWithStreamedResponse:
+        return OpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
+        if not api_key:
+            # if the api key is an empty string, encoding the header will fail
+            return {}
         return {"Authorization": f"Bearer {api_key}"}
 
     @property
@@ -147,7 +293,9 @@ def auth_headers(self) -> dict[str, str]:
     def default_headers(self) -> dict[str, str | Omit]:
         return {
             **super().default_headers,
+            "X-Stainless-Async": "false",
             "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
             **self._custom_headers,
         }
 
@@ -156,6 +304,9 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
@@ -191,7 +342,10 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
-            base_url=base_url or str(self.base_url),
+            project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
@@ -204,16 +358,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        self.close()
-
     @override
     def _make_status_error(
         self,
@@ -250,35 +394,36 @@ def _make_status_error(
 
 
 class AsyncOpenAI(AsyncAPIClient):
-    completions: resources.AsyncCompletions
-    chat: resources.AsyncChat
-    edits: resources.AsyncEdits
-    embeddings: resources.AsyncEmbeddings
-    files: resources.AsyncFiles
-    images: resources.AsyncImages
-    audio: resources.AsyncAudio
-    moderations: resources.AsyncModerations
-    models: resources.AsyncModels
-    fine_tuning: resources.AsyncFineTuning
-    fine_tunes: resources.AsyncFineTunes
-    beta: resources.AsyncBeta
-    with_raw_response: AsyncOpenAIWithRawResponse
-
     # client options
     api_key: str
     organization: str | None
+    project: str | None
+    webhook_secret: str | None
+
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com/' becomes
+    'wss://example.com'
+    """
 
     def __init__(
         self,
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
         http_client: httpx.AsyncClient | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
@@ -290,11 +435,13 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new async openai client instance.
+        """Construct a new async AsyncOpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -308,6 +455,16 @@ def __init__(
             organization = os.environ.get("OPENAI_ORG_ID")
         self.organization = organization
 
+        if project is None:
+            project = os.environ.get("OPENAI_PROJECT_ID")
+        self.project = project
+
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -326,29 +483,128 @@ def __init__(
 
         self._default_stream_cls = AsyncStream
 
-        self.completions = resources.AsyncCompletions(self)
-        self.chat = resources.AsyncChat(self)
-        self.edits = resources.AsyncEdits(self)
-        self.embeddings = resources.AsyncEmbeddings(self)
-        self.files = resources.AsyncFiles(self)
-        self.images = resources.AsyncImages(self)
-        self.audio = resources.AsyncAudio(self)
-        self.moderations = resources.AsyncModerations(self)
-        self.models = resources.AsyncModels(self)
-        self.fine_tuning = resources.AsyncFineTuning(self)
-        self.fine_tunes = resources.AsyncFineTunes(self)
-        self.beta = resources.AsyncBeta(self)
-        self.with_raw_response = AsyncOpenAIWithRawResponse(self)
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        from .resources.completions import AsyncCompletions
+
+        return AsyncCompletions(self)
+
+    @cached_property
+    def chat(self) -> AsyncChat:
+        from .resources.chat import AsyncChat
+
+        return AsyncChat(self)
+
+    @cached_property
+    def embeddings(self) -> AsyncEmbeddings:
+        from .resources.embeddings import AsyncEmbeddings
+
+        return AsyncEmbeddings(self)
+
+    @cached_property
+    def files(self) -> AsyncFiles:
+        from .resources.files import AsyncFiles
+
+        return AsyncFiles(self)
+
+    @cached_property
+    def images(self) -> AsyncImages:
+        from .resources.images import AsyncImages
+
+        return AsyncImages(self)
+
+    @cached_property
+    def audio(self) -> AsyncAudio:
+        from .resources.audio import AsyncAudio
+
+        return AsyncAudio(self)
+
+    @cached_property
+    def moderations(self) -> AsyncModerations:
+        from .resources.moderations import AsyncModerations
+
+        return AsyncModerations(self)
+
+    @cached_property
+    def models(self) -> AsyncModels:
+        from .resources.models import AsyncModels
+
+        return AsyncModels(self)
+
+    @cached_property
+    def fine_tuning(self) -> AsyncFineTuning:
+        from .resources.fine_tuning import AsyncFineTuning
+
+        return AsyncFineTuning(self)
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStores:
+        from .resources.vector_stores import AsyncVectorStores
+
+        return AsyncVectorStores(self)
+
+    @cached_property
+    def webhooks(self) -> AsyncWebhooks:
+        from .resources.webhooks import AsyncWebhooks
+
+        return AsyncWebhooks(self)
+
+    @cached_property
+    def beta(self) -> AsyncBeta:
+        from .resources.beta import AsyncBeta
+
+        return AsyncBeta(self)
+
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        from .resources.batches import AsyncBatches
+
+        return AsyncBatches(self)
+
+    @cached_property
+    def uploads(self) -> AsyncUploads:
+        from .resources.uploads import AsyncUploads
+
+        return AsyncUploads(self)
+
+    @cached_property
+    def responses(self) -> AsyncResponses:
+        from .resources.responses import AsyncResponses
+
+        return AsyncResponses(self)
+
+    @cached_property
+    def evals(self) -> AsyncEvals:
+        from .resources.evals import AsyncEvals
+
+        return AsyncEvals(self)
+
+    @cached_property
+    def containers(self) -> AsyncContainers:
+        from .resources.containers import AsyncContainers
+
+        return AsyncContainers(self)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncOpenAIWithRawResponse:
+        return AsyncOpenAIWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncOpenAIWithStreamedResponse:
+        return AsyncOpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
+        if not api_key:
+            # if the api key is an empty string, encoding the header will fail
+            return {}
         return {"Authorization": f"Bearer {api_key}"}
 
     @property
@@ -356,7 +612,9 @@ def auth_headers(self) -> dict[str, str]:
     def default_headers(self) -> dict[str, str | Omit]:
         return {
             **super().default_headers,
+            "X-Stainless-Async": f"async:{get_async_library()}",
             "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
+            "OpenAI-Project": self.project if self.project is not None else Omit(),
             **self._custom_headers,
         }
 
@@ -365,6 +623,9 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
@@ -400,7 +661,10 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
-            base_url=base_url or str(self.base_url),
+            project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
@@ -413,19 +677,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        try:
-            asyncio.get_running_loop().create_task(self.close())
-        except Exception:
-            pass
-
     @override
     def _make_status_error(
         self,
@@ -462,35 +713,415 @@ def _make_status_error(
 
 
 class OpenAIWithRawResponse:
+    _client: OpenAI
+
     def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithRawResponse(client.completions)
-        self.chat = resources.ChatWithRawResponse(client.chat)
-        self.edits = resources.EditsWithRawResponse(client.edits)
-        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.FilesWithRawResponse(client.files)
-        self.images = resources.ImagesWithRawResponse(client.images)
-        self.audio = resources.AudioWithRawResponse(client.audio)
-        self.moderations = resources.ModerationsWithRawResponse(client.moderations)
-        self.models = resources.ModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
-        self.fine_tunes = resources.FineTunesWithRawResponse(client.fine_tunes)
-        self.beta = resources.BetaWithRawResponse(client.beta)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.CompletionsWithRawResponse:
+        from .resources.completions import CompletionsWithRawResponse
+
+        return CompletionsWithRawResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.ChatWithRawResponse:
+        from .resources.chat import ChatWithRawResponse
+
+        return ChatWithRawResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.EmbeddingsWithRawResponse:
+        from .resources.embeddings import EmbeddingsWithRawResponse
+
+        return EmbeddingsWithRawResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.FilesWithRawResponse:
+        from .resources.files import FilesWithRawResponse
+
+        return FilesWithRawResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.ImagesWithRawResponse:
+        from .resources.images import ImagesWithRawResponse
+
+        return ImagesWithRawResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AudioWithRawResponse:
+        from .resources.audio import AudioWithRawResponse
+
+        return AudioWithRawResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.ModerationsWithRawResponse:
+        from .resources.moderations import ModerationsWithRawResponse
+
+        return ModerationsWithRawResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.ModelsWithRawResponse:
+        from .resources.models import ModelsWithRawResponse
+
+        return ModelsWithRawResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.FineTuningWithRawResponse:
+        from .resources.fine_tuning import FineTuningWithRawResponse
+
+        return FineTuningWithRawResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.VectorStoresWithRawResponse:
+        from .resources.vector_stores import VectorStoresWithRawResponse
+
+        return VectorStoresWithRawResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.BetaWithRawResponse:
+        from .resources.beta import BetaWithRawResponse
+
+        return BetaWithRawResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.BatchesWithRawResponse:
+        from .resources.batches import BatchesWithRawResponse
+
+        return BatchesWithRawResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.UploadsWithRawResponse:
+        from .resources.uploads import UploadsWithRawResponse
+
+        return UploadsWithRawResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.ResponsesWithRawResponse:
+        from .resources.responses import ResponsesWithRawResponse
+
+        return ResponsesWithRawResponse(self._client.responses)
+
+    @cached_property
+    def evals(self) -> evals.EvalsWithRawResponse:
+        from .resources.evals import EvalsWithRawResponse
+
+        return EvalsWithRawResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.ContainersWithRawResponse:
+        from .resources.containers import ContainersWithRawResponse
+
+        return ContainersWithRawResponse(self._client.containers)
 
 
 class AsyncOpenAIWithRawResponse:
+    _client: AsyncOpenAI
+
+    def __init__(self, client: AsyncOpenAI) -> None:
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.AsyncCompletionsWithRawResponse:
+        from .resources.completions import AsyncCompletionsWithRawResponse
+
+        return AsyncCompletionsWithRawResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.AsyncChatWithRawResponse:
+        from .resources.chat import AsyncChatWithRawResponse
+
+        return AsyncChatWithRawResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.AsyncEmbeddingsWithRawResponse:
+        from .resources.embeddings import AsyncEmbeddingsWithRawResponse
+
+        return AsyncEmbeddingsWithRawResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.AsyncFilesWithRawResponse:
+        from .resources.files import AsyncFilesWithRawResponse
+
+        return AsyncFilesWithRawResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.AsyncImagesWithRawResponse:
+        from .resources.images import AsyncImagesWithRawResponse
+
+        return AsyncImagesWithRawResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AsyncAudioWithRawResponse:
+        from .resources.audio import AsyncAudioWithRawResponse
+
+        return AsyncAudioWithRawResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.AsyncModerationsWithRawResponse:
+        from .resources.moderations import AsyncModerationsWithRawResponse
+
+        return AsyncModerationsWithRawResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.AsyncModelsWithRawResponse:
+        from .resources.models import AsyncModelsWithRawResponse
+
+        return AsyncModelsWithRawResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.AsyncFineTuningWithRawResponse:
+        from .resources.fine_tuning import AsyncFineTuningWithRawResponse
+
+        return AsyncFineTuningWithRawResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.AsyncVectorStoresWithRawResponse:
+        from .resources.vector_stores import AsyncVectorStoresWithRawResponse
+
+        return AsyncVectorStoresWithRawResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaWithRawResponse:
+        from .resources.beta import AsyncBetaWithRawResponse
+
+        return AsyncBetaWithRawResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesWithRawResponse:
+        from .resources.batches import AsyncBatchesWithRawResponse
+
+        return AsyncBatchesWithRawResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.AsyncUploadsWithRawResponse:
+        from .resources.uploads import AsyncUploadsWithRawResponse
+
+        return AsyncUploadsWithRawResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesWithRawResponse:
+        from .resources.responses import AsyncResponsesWithRawResponse
+
+        return AsyncResponsesWithRawResponse(self._client.responses)
+
+    @cached_property
+    def evals(self) -> evals.AsyncEvalsWithRawResponse:
+        from .resources.evals import AsyncEvalsWithRawResponse
+
+        return AsyncEvalsWithRawResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.AsyncContainersWithRawResponse:
+        from .resources.containers import AsyncContainersWithRawResponse
+
+        return AsyncContainersWithRawResponse(self._client.containers)
+
+
+class OpenAIWithStreamedResponse:
+    _client: OpenAI
+
+    def __init__(self, client: OpenAI) -> None:
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.CompletionsWithStreamingResponse:
+        from .resources.completions import CompletionsWithStreamingResponse
+
+        return CompletionsWithStreamingResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.ChatWithStreamingResponse:
+        from .resources.chat import ChatWithStreamingResponse
+
+        return ChatWithStreamingResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.EmbeddingsWithStreamingResponse:
+        from .resources.embeddings import EmbeddingsWithStreamingResponse
+
+        return EmbeddingsWithStreamingResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.FilesWithStreamingResponse:
+        from .resources.files import FilesWithStreamingResponse
+
+        return FilesWithStreamingResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.ImagesWithStreamingResponse:
+        from .resources.images import ImagesWithStreamingResponse
+
+        return ImagesWithStreamingResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AudioWithStreamingResponse:
+        from .resources.audio import AudioWithStreamingResponse
+
+        return AudioWithStreamingResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.ModerationsWithStreamingResponse:
+        from .resources.moderations import ModerationsWithStreamingResponse
+
+        return ModerationsWithStreamingResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.ModelsWithStreamingResponse:
+        from .resources.models import ModelsWithStreamingResponse
+
+        return ModelsWithStreamingResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.FineTuningWithStreamingResponse:
+        from .resources.fine_tuning import FineTuningWithStreamingResponse
+
+        return FineTuningWithStreamingResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.VectorStoresWithStreamingResponse:
+        from .resources.vector_stores import VectorStoresWithStreamingResponse
+
+        return VectorStoresWithStreamingResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.BetaWithStreamingResponse:
+        from .resources.beta import BetaWithStreamingResponse
+
+        return BetaWithStreamingResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.BatchesWithStreamingResponse:
+        from .resources.batches import BatchesWithStreamingResponse
+
+        return BatchesWithStreamingResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.UploadsWithStreamingResponse:
+        from .resources.uploads import UploadsWithStreamingResponse
+
+        return UploadsWithStreamingResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.ResponsesWithStreamingResponse:
+        from .resources.responses import ResponsesWithStreamingResponse
+
+        return ResponsesWithStreamingResponse(self._client.responses)
+
+    @cached_property
+    def evals(self) -> evals.EvalsWithStreamingResponse:
+        from .resources.evals import EvalsWithStreamingResponse
+
+        return EvalsWithStreamingResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.ContainersWithStreamingResponse:
+        from .resources.containers import ContainersWithStreamingResponse
+
+        return ContainersWithStreamingResponse(self._client.containers)
+
+
+class AsyncOpenAIWithStreamedResponse:
+    _client: AsyncOpenAI
+
     def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
-        self.chat = resources.AsyncChatWithRawResponse(client.chat)
-        self.edits = resources.AsyncEditsWithRawResponse(client.edits)
-        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithRawResponse(client.files)
-        self.images = resources.AsyncImagesWithRawResponse(client.images)
-        self.audio = resources.AsyncAudioWithRawResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithRawResponse(client.moderations)
-        self.models = resources.AsyncModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
-        self.fine_tunes = resources.AsyncFineTunesWithRawResponse(client.fine_tunes)
-        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.AsyncCompletionsWithStreamingResponse:
+        from .resources.completions import AsyncCompletionsWithStreamingResponse
+
+        return AsyncCompletionsWithStreamingResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.AsyncChatWithStreamingResponse:
+        from .resources.chat import AsyncChatWithStreamingResponse
+
+        return AsyncChatWithStreamingResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.AsyncEmbeddingsWithStreamingResponse:
+        from .resources.embeddings import AsyncEmbeddingsWithStreamingResponse
+
+        return AsyncEmbeddingsWithStreamingResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.AsyncFilesWithStreamingResponse:
+        from .resources.files import AsyncFilesWithStreamingResponse
+
+        return AsyncFilesWithStreamingResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.AsyncImagesWithStreamingResponse:
+        from .resources.images import AsyncImagesWithStreamingResponse
+
+        return AsyncImagesWithStreamingResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AsyncAudioWithStreamingResponse:
+        from .resources.audio import AsyncAudioWithStreamingResponse
+
+        return AsyncAudioWithStreamingResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.AsyncModerationsWithStreamingResponse:
+        from .resources.moderations import AsyncModerationsWithStreamingResponse
+
+        return AsyncModerationsWithStreamingResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.AsyncModelsWithStreamingResponse:
+        from .resources.models import AsyncModelsWithStreamingResponse
+
+        return AsyncModelsWithStreamingResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.AsyncFineTuningWithStreamingResponse:
+        from .resources.fine_tuning import AsyncFineTuningWithStreamingResponse
+
+        return AsyncFineTuningWithStreamingResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.AsyncVectorStoresWithStreamingResponse:
+        from .resources.vector_stores import AsyncVectorStoresWithStreamingResponse
+
+        return AsyncVectorStoresWithStreamingResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaWithStreamingResponse:
+        from .resources.beta import AsyncBetaWithStreamingResponse
+
+        return AsyncBetaWithStreamingResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesWithStreamingResponse:
+        from .resources.batches import AsyncBatchesWithStreamingResponse
+
+        return AsyncBatchesWithStreamingResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.AsyncUploadsWithStreamingResponse:
+        from .resources.uploads import AsyncUploadsWithStreamingResponse
+
+        return AsyncUploadsWithStreamingResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesWithStreamingResponse:
+        from .resources.responses import AsyncResponsesWithStreamingResponse
+
+        return AsyncResponsesWithStreamingResponse(self._client.responses)
+
+    @cached_property
+    def evals(self) -> evals.AsyncEvalsWithStreamingResponse:
+        from .resources.evals import AsyncEvalsWithStreamingResponse
+
+        return AsyncEvalsWithStreamingResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.AsyncContainersWithStreamingResponse:
+        from .resources.containers import AsyncContainersWithStreamingResponse
+
+        return AsyncContainersWithStreamingResponse(self._client.containers)
 
 
 Client = OpenAI
diff --git a/src/openai/_compat.py b/src/openai/_compat.py
index 34323c9b7e..87fc370765 100644
--- a/src/openai/_compat.py
+++ b/src/openai/_compat.py
@@ -1,13 +1,15 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Union, TypeVar, cast
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
 from datetime import date, datetime
+from typing_extensions import Self, Literal
 
 import pydantic
 from pydantic.fields import FieldInfo
 
-from ._types import StrBytesIntFloat
+from ._types import IncEx, StrBytesIntFloat
 
+_T = TypeVar("_T")
 _ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
 
 # --------------- Pydantic v2 compatibility ---------------
@@ -43,21 +45,23 @@ def is_typeddict(type_: type[Any]) -> bool:  # noqa: ARG001
 
 else:
     if PYDANTIC_V2:
-        from pydantic.v1.typing import get_args as get_args
-        from pydantic.v1.typing import is_union as is_union
-        from pydantic.v1.typing import get_origin as get_origin
-        from pydantic.v1.typing import is_typeddict as is_typeddict
-        from pydantic.v1.typing import is_literal_type as is_literal_type
-        from pydantic.v1.datetime_parse import parse_date as parse_date
-        from pydantic.v1.datetime_parse import parse_datetime as parse_datetime
+        from pydantic.v1.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
     else:
-        from pydantic.typing import get_args as get_args
-        from pydantic.typing import is_union as is_union
-        from pydantic.typing import get_origin as get_origin
-        from pydantic.typing import is_typeddict as is_typeddict
-        from pydantic.typing import is_literal_type as is_literal_type
-        from pydantic.datetime_parse import parse_date as parse_date
-        from pydantic.datetime_parse import parse_datetime as parse_datetime
+        from pydantic.typing import (
+            get_args as get_args,
+            is_union as is_union,
+            get_origin as get_origin,
+            is_typeddict as is_typeddict,
+            is_literal_type as is_literal_type,
+        )
+        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
 
 
 # refactored config
@@ -114,10 +118,10 @@ def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
     return model.__fields__  # type: ignore
 
 
-def model_copy(model: _ModelT) -> _ModelT:
+def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT:
     if PYDANTIC_V2:
-        return model.model_copy()
-    return model.copy()  # type: ignore
+        return model.model_copy(deep=deep)
+    return model.copy(deep=deep)  # type: ignore
 
 
 def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
@@ -129,17 +133,25 @@ def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
 def model_dump(
     model: pydantic.BaseModel,
     *,
+    exclude: IncEx | None = None,
     exclude_unset: bool = False,
     exclude_defaults: bool = False,
+    warnings: bool = True,
+    mode: Literal["json", "python"] = "python",
 ) -> dict[str, Any]:
-    if PYDANTIC_V2:
+    if PYDANTIC_V2 or hasattr(model, "model_dump"):
         return model.model_dump(
+            mode=mode,
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            # warnings are not supported in Pydantic v1
+            warnings=warnings if PYDANTIC_V2 else True,
         )
     return cast(
         "dict[str, Any]",
         model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
         ),
@@ -152,22 +164,68 @@ def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
     return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
 
 
+def model_parse_json(model: type[_ModelT], data: str | bytes) -> _ModelT:
+    if PYDANTIC_V2:
+        return model.model_validate_json(data)
+    return model.parse_raw(data)  # pyright: ignore[reportDeprecated]
+
+
+def model_json_schema(model: type[_ModelT]) -> dict[str, Any]:
+    if PYDANTIC_V2:
+        return model.model_json_schema()
+    return model.schema()  # pyright: ignore[reportDeprecated]
+
+
 # generic models
 if TYPE_CHECKING:
 
-    class GenericModel(pydantic.BaseModel):
-        ...
+    class GenericModel(pydantic.BaseModel): ...
 
 else:
     if PYDANTIC_V2:
         # there no longer needs to be a distinction in v2 but
         # we still have to create our own subclass to avoid
         # inconsistent MRO ordering errors
-        class GenericModel(pydantic.BaseModel):
-            ...
+        class GenericModel(pydantic.BaseModel): ...
 
     else:
         import pydantic.generics
 
-        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
-            ...
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
+
+
+# cached properties
+if TYPE_CHECKING:
+    cached_property = property
+
+    # we define a separate type (copied from typeshed)
+    # that represents that `cached_property` is `set`able
+    # at runtime, which differs from `@property`.
+    #
+    # this is a separate type as editors likely special case
+    # `@property` and we don't want to cause issues just to have
+    # more helpful internal types.
+
+    class typed_cached_property(Generic[_T]):
+        func: Callable[[Any], _T]
+        attrname: str | None
+
+        def __init__(self, func: Callable[[Any], _T]) -> None: ...
+
+        @overload
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
+
+        @overload
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
+
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
+            raise NotImplementedError()
+
+        def __set_name__(self, owner: type[Any], name: str) -> None: ...
+
+        # __set__ is not defined at runtime, but @cached_property is designed to be settable
+        def __set__(self, instance: object, value: _T) -> None: ...
+else:
+    from functools import cached_property as cached_property
+
+    typed_cached_property = cached_property
diff --git a/src/openai/_constants.py b/src/openai/_constants.py
index 2e402300d3..7029dc72b0 100644
--- a/src/openai/_constants.py
+++ b/src/openai/_constants.py
@@ -1,10 +1,14 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 import httpx
 
 RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 10 minutes
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=600, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
-DEFAULT_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
+DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
+
+INITIAL_RETRY_DELAY = 0.5
+MAX_RETRY_DELAY = 8.0
diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py
index b79ac5fd64..09016dfedb 100644
--- a/src/openai/_exceptions.py
+++ b/src/openai/_exceptions.py
@@ -1,13 +1,17 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Any, Optional, cast
+from typing import TYPE_CHECKING, Any, Optional, cast
 from typing_extensions import Literal
 
 import httpx
 
 from ._utils import is_dict
+from ._models import construct_type
+
+if TYPE_CHECKING:
+    from .types.chat import ChatCompletion
 
 __all__ = [
     "BadRequestError",
@@ -18,6 +22,9 @@
     "UnprocessableEntityError",
     "RateLimitError",
     "InternalServerError",
+    "LengthFinishReasonError",
+    "ContentFilterFinishReasonError",
+    "InvalidWebhookSignatureError",
 ]
 
 
@@ -40,19 +47,20 @@ class APIError(OpenAIError):
     If there was no response associated with this error then it will be `None`.
     """
 
-    code: Optional[str]
-    param: Optional[str]
+    code: Optional[str] = None
+    param: Optional[str] = None
     type: Optional[str]
 
     def __init__(self, message: str, request: httpx.Request, *, body: object | None) -> None:
         super().__init__(message)
         self.request = request
         self.message = message
+        self.body = body
 
         if is_dict(body):
-            self.code = cast(Any, body.get("code"))
-            self.param = cast(Any, body.get("param"))
-            self.type = cast(Any, body.get("type"))
+            self.code = cast(Any, construct_type(type_=Optional[str], value=body.get("code")))
+            self.param = cast(Any, construct_type(type_=Optional[str], value=body.get("param")))
+            self.type = cast(Any, construct_type(type_=str, value=body.get("type")))
         else:
             self.code = None
             self.param = None
@@ -74,11 +82,13 @@ class APIStatusError(APIError):
 
     response: httpx.Response
     status_code: int
+    request_id: str | None
 
     def __init__(self, message: str, *, response: httpx.Response, body: object | None) -> None:
         super().__init__(message, response.request, body=body)
         self.response = response
         self.status_code = response.status_code
+        self.request_id = response.headers.get("x-request-id")
 
 
 class APIConnectionError(APIError):
@@ -121,3 +131,31 @@ class RateLimitError(APIStatusError):
 
 class InternalServerError(APIStatusError):
     pass
+
+
+class LengthFinishReasonError(OpenAIError):
+    completion: ChatCompletion
+    """The completion that caused this error.
+
+    Note: this will *not* be a complete `ChatCompletion` object when streaming as `usage`
+          will not be included.
+    """
+
+    def __init__(self, *, completion: ChatCompletion) -> None:
+        msg = "Could not parse response content as the length limit was reached"
+        if completion.usage:
+            msg += f" - {completion.usage}"
+
+        super().__init__(msg)
+        self.completion = completion
+
+
+class ContentFilterFinishReasonError(OpenAIError):
+    def __init__(self) -> None:
+        super().__init__(
+            f"Could not parse response content as the request was rejected by the content filter",
+        )
+
+
+class InvalidWebhookSignatureError(ValueError):
+    """Raised when a webhook signature is invalid, meaning the computed signature does not match the expected signature."""
diff --git a/src/openai/_extras/__init__.py b/src/openai/_extras/__init__.py
index dc6625c5dc..692de248c0 100644
--- a/src/openai/_extras/__init__.py
+++ b/src/openai/_extras/__init__.py
@@ -1,3 +1,3 @@
-from .numpy_proxy import numpy as numpy
-from .numpy_proxy import has_numpy as has_numpy
+from .numpy_proxy import numpy as numpy, has_numpy as has_numpy
 from .pandas_proxy import pandas as pandas
+from .sounddevice_proxy import sounddevice as sounddevice
diff --git a/src/openai/_extras/numpy_proxy.py b/src/openai/_extras/numpy_proxy.py
index 408eaebd3b..2b0669576e 100644
--- a/src/openai/_extras/numpy_proxy.py
+++ b/src/openai/_extras/numpy_proxy.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any
-from typing_extensions import ClassVar, override
+from typing_extensions import override
 
 from .._utils import LazyProxy
 from ._common import MissingDependencyError, format_instructions
@@ -10,18 +10,16 @@
     import numpy as numpy
 
 
-NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="datalib")
+NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="voice_helpers")
 
 
 class NumpyProxy(LazyProxy[Any]):
-    should_cache: ClassVar[bool] = True
-
     @override
     def __load__(self) -> Any:
         try:
             import numpy
-        except ImportError:
-            raise MissingDependencyError(NUMPY_INSTRUCTIONS)
+        except ImportError as err:
+            raise MissingDependencyError(NUMPY_INSTRUCTIONS) from err
 
         return numpy
 
diff --git a/src/openai/_extras/pandas_proxy.py b/src/openai/_extras/pandas_proxy.py
index 2fc0d2a7eb..686377bade 100644
--- a/src/openai/_extras/pandas_proxy.py
+++ b/src/openai/_extras/pandas_proxy.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any
-from typing_extensions import ClassVar, override
+from typing_extensions import override
 
 from .._utils import LazyProxy
 from ._common import MissingDependencyError, format_instructions
@@ -14,14 +14,12 @@
 
 
 class PandasProxy(LazyProxy[Any]):
-    should_cache: ClassVar[bool] = True
-
     @override
     def __load__(self) -> Any:
         try:
             import pandas
-        except ImportError:
-            raise MissingDependencyError(PANDAS_INSTRUCTIONS)
+        except ImportError as err:
+            raise MissingDependencyError(PANDAS_INSTRUCTIONS) from err
 
         return pandas
 
diff --git a/src/openai/_extras/sounddevice_proxy.py b/src/openai/_extras/sounddevice_proxy.py
new file mode 100644
index 0000000000..482d4c6874
--- /dev/null
+++ b/src/openai/_extras/sounddevice_proxy.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
+
+from .._utils import LazyProxy
+from ._common import MissingDependencyError, format_instructions
+
+if TYPE_CHECKING:
+    import sounddevice as sounddevice  # type: ignore
+
+
+SOUNDDEVICE_INSTRUCTIONS = format_instructions(library="sounddevice", extra="voice_helpers")
+
+
+class SounddeviceProxy(LazyProxy[Any]):
+    @override
+    def __load__(self) -> Any:
+        try:
+            import sounddevice  # type: ignore
+        except ImportError as err:
+            raise MissingDependencyError(SOUNDDEVICE_INSTRUCTIONS) from err
+
+        return sounddevice
+
+
+if not TYPE_CHECKING:
+    sounddevice = SounddeviceProxy()
diff --git a/src/openai/_files.py b/src/openai/_files.py
index bebfb19501..801a0d2928 100644
--- a/src/openai/_files.py
+++ b/src/openai/_files.py
@@ -13,12 +13,17 @@
     FileContent,
     RequestFiles,
     HttpxFileTypes,
+    Base64FileInput,
     HttpxFileContent,
     HttpxRequestFiles,
 )
 from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
 
 
+def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
+    return isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
+
+
 def is_file_content(obj: object) -> TypeGuard[FileContent]:
     return (
         isinstance(obj, bytes) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
@@ -34,13 +39,11 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
 
 
 @overload
-def to_httpx_files(files: None) -> None:
-    ...
+def to_httpx_files(files: None) -> None: ...
 
 
 @overload
-def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
@@ -78,13 +81,11 @@ def _read_file_content(file: FileContent) -> HttpxFileContent:
 
 
 @overload
-async def async_to_httpx_files(files: None) -> None:
-    ...
+async def async_to_httpx_files(files: None) -> None: ...
 
 
 @overload
-async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
new file mode 100644
index 0000000000..cfabaa2fc2
--- /dev/null
+++ b/src/openai/_legacy_response.py
@@ -0,0 +1,488 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
+
+import anyio
+import httpx
+import pydantic
+
+from ._types import NoneType
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type
+from ._models import BaseModel, is_basemodel, add_request_id
+from ._constants import RAW_RESPONSE_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
+from ._exceptions import APIResponseValidationError
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import BaseClient
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+_T = TypeVar("_T")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+    """This is a legacy class as it will be replaced by `APIResponse`
+    and `AsyncAPIResponse` in the `_response.py` file in the next major
+    release.
+
+    For the sync client this will mostly be the same with the exception
+    of `content` & `text` will be methods instead of properties. In the
+    async client, all methods will be async.
+
+    A migration script will be provided & the migration in general should
+    be smooth.
+    """
+
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed_by_type: dict[type[Any], Any]
+    _stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+        retries_taken: int = 0,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed_by_type = {}
+        self._stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+        self.retries_taken = retries_taken
+
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        NOTE: For the async client: this will become a coroutine in the next major version.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(http://github.com/self) -> httpx.URL:
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def content(self) -> bytes:
+        """Return the binary response content.
+
+        NOTE: this will be removed in favour of `.read()` in the
+        next major version.
+        """
+        return self.http_response.content
+
+    @property
+    def text(self) -> str:
+        """Return the decoded response content.
+
+        NOTE: this will be turned into a method in the next major version.
+        """
+        return self.http_response.text
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def is_closed(self) -> bool:
+        return self.http_response.is_closed
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
+
+        if self._stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
+        if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == LegacyAPIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(
+            origin  # pyright: ignore[reportUnknownArgumentType]
+        ) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
+            return cast(R, response)
+
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if not content_type.endswith("json"):
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+    @override
+    def __repr__(self) -> str:
+        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
+        )
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[LegacyAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+class HttpxBinaryResponseContent:
+    response: httpx.Response
+
+    def __init__(self, response: httpx.Response) -> None:
+        self.response = response
+
+    @property
+    def content(self) -> bytes:
+        return self.response.content
+
+    @property
+    def text(self) -> str:
+        return self.response.text
+
+    @property
+    def encoding(self) -> str | None:
+        return self.response.encoding
+
+    @property
+    def charset_encoding(self) -> str | None:
+        return self.response.charset_encoding
+
+    def json(self, **kwargs: Any) -> Any:
+        return self.response.json(**kwargs)
+
+    def read(self) -> bytes:
+        return self.response.read()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_bytes(chunk_size)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        return self.response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        return self.response.iter_lines()
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_raw(chunk_size)
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes():
+                f.write(data)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes(chunk_size):
+                f.write(data)
+
+    def close(self) -> None:
+        return self.response.close()
+
+    async def aread(self) -> bytes:
+        return await self.response.aread()
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_bytes(chunk_size)
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        return self.response.aiter_text(chunk_size)
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        return self.response.aiter_lines()
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_raw(chunk_size)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.response.aiter_bytes(chunk_size):
+                await f.write(data)
+
+    async def aclose(self) -> None:
+        return await self.response.aclose()
diff --git a/src/openai/_models.py b/src/openai/_models.py
index 5b8c96010f..065e8da760 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
+import os
 import inspect
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
+from typing import TYPE_CHECKING, Any, Type, Tuple, Union, Generic, TypeVar, Callable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import (
     Unpack,
@@ -9,14 +10,16 @@
     ClassVar,
     Protocol,
     Required,
+    Sequence,
+    ParamSpec,
     TypedDict,
+    TypeGuard,
     final,
     override,
     runtime_checkable,
 )
 
 import pydantic
-import pydantic.generics
 from pydantic.fields import FieldInfo
 
 from ._types import (
@@ -31,16 +34,25 @@
     HttpxRequestFiles,
 )
 from ._utils import (
+    PropertyInfo,
     is_list,
     is_given,
+    json_safe,
+    lru_cache,
     is_mapping,
     parse_date,
+    coerce_boolean,
     parse_datetime,
     strip_not_given,
+    extract_type_arg,
+    is_annotated_type,
+    is_type_alias_type,
+    strip_annotated_type,
 )
-from ._compat import PYDANTIC_V2, ConfigDict
-from ._compat import GenericModel as BaseGenericModel
 from ._compat import (
+    PYDANTIC_V2,
+    ConfigDict,
+    GenericModel as BaseGenericModel,
     get_args,
     is_union,
     parse_obj,
@@ -52,9 +64,17 @@
 )
 from ._constants import RAW_RESPONSE_HEADER
 
+if TYPE_CHECKING:
+    from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
+
 __all__ = ["BaseModel", "GenericModel"]
 
 _T = TypeVar("_T")
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+P = ParamSpec("P")
+
+ReprArgs = Sequence[Tuple[Optional[str], Any]]
 
 
 @runtime_checkable
@@ -64,7 +84,9 @@ class _ConfigProtocol(Protocol):
 
 class BaseModel(pydantic.BaseModel):
     if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow")
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+        )
     else:
 
         @property
@@ -76,24 +98,119 @@ def model_fields_set(self) -> set[str]:
         class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
             extra: Any = pydantic.Extra.allow  # type: ignore
 
+        @override
+        def __repr_args__(self) -> ReprArgs:
+            # we don't want these attributes to be included when something like `rich.print` is used
+            return [arg for arg in super().__repr_args__() if arg[0] not in {"_request_id", "__exclude_fields__"}]
+
+    if TYPE_CHECKING:
+        _request_id: Optional[str] = None
+        """The ID of the request, returned via the X-Request-ID header. Useful for debugging requests and reporting issues to OpenAI.
+
+        This will **only** be set for the top-level response object, it will not be defined for nested objects. For example:
+        
+        ```py
+        completion = await client.chat.completions.create(...)
+        completion._request_id  # req_id_xxx
+        completion.usage._request_id  # raises `AttributeError`
+        ```
+
+        Note: unlike other properties that use an `_` prefix, this property
+        *is* public. Unless documented otherwise, all other `_` prefix properties,
+        methods and modules are *private*.
+        """
+
+    def to_dict(
+        self,
+        *,
+        mode: Literal["json", "python"] = "python",
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> dict[str, object]:
+        """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            mode:
+                If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`.
+                If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)`
+
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that are set to their default value from the output.
+            exclude_none: Whether to exclude fields that have a value of `None` from the output.
+            warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2.
+        """
+        return self.model_dump(
+            mode=mode,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
+    def to_json(
+        self,
+        *,
+        indent: int | None = 2,
+        use_api_names: bool = True,
+        exclude_unset: bool = True,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        warnings: bool = True,
+    ) -> str:
+        """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation).
+
+        By default, fields that were not set by the API will not be included,
+        and keys will match the API response, *not* the property names from the model.
+
+        For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property,
+        the output will use the `"fooBar"` key (unless `use_api_names=False` is passed).
+
+        Args:
+            indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2`
+            use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`.
+            exclude_unset: Whether to exclude fields that have not been explicitly set.
+            exclude_defaults: Whether to exclude fields that have the default value.
+            exclude_none: Whether to exclude fields that have a value of `None`.
+            warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2.
+        """
+        return self.model_dump_json(
+            indent=indent,
+            by_alias=use_api_names,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            warnings=warnings,
+        )
+
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
 
     # Override the 'construct' method in a way that supports recursive parsing without validation.
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
     @classmethod
     @override
-    def construct(
-        cls: Type[ModelT],
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
+        __cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
     ) -> ModelT:
-        m = cls.__new__(cls)
+        m = __cls.__new__(__cls)
         fields_values: dict[str, object] = {}
 
-        config = get_model_config(cls)
+        config = get_model_config(__cls)
         populate_by_name = (
             config.allow_population_by_field_name
             if isinstance(config, _ConfigProtocol)
@@ -103,7 +220,7 @@ def construct(
         if _fields_set is None:
             _fields_set = set()
 
-        model_fields = get_model_fields(cls)
+        model_fields = get_model_fields(__cls)
         for name, field in model_fields.items():
             key = field.alias
             if key is None or (key not in values and populate_by_name):
@@ -157,14 +274,16 @@ def model_dump(
             self,
             *,
             mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
             exclude_none: bool = False,
             round_trip: bool = False,
-            warnings: bool = True,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
         ) -> dict[str, Any]:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
 
@@ -186,13 +305,17 @@ def model_dump(
             Returns:
                 A dictionary representation of the model.
             """
-            if mode != "python":
-                raise ValueError("mode is only supported in Pydantic v2")
+            if mode not in {"json", "python"}:
+                raise ValueError("mode must be either 'json' or 'python'")
             if round_trip != False:
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
                 raise ValueError("warnings is only supported in Pydantic v2")
-            return super().dict(  # pyright: ignore[reportDeprecated]
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            dumped = super().dict(  # pyright: ignore[reportDeprecated]
                 include=include,
                 exclude=exclude,
                 by_alias=by_alias,
@@ -201,19 +324,23 @@ def model_dump(
                 exclude_none=exclude_none,
             )
 
+            return cast(dict[str, Any], json_safe(dumped)) if mode == "json" else dumped
+
         @override
         def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
             exclude_none: bool = False,
             round_trip: bool = False,
-            warnings: bool = True,
+            warnings: bool | Literal["none", "warn", "error"] = True,
+            context: dict[str, Any] | None = None,
+            serialize_as_any: bool = False,
         ) -> str:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
 
@@ -237,6 +364,10 @@ def model_dump_json(
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
                 raise ValueError("warnings is only supported in Pydantic v2")
+            if context is not None:
+                raise ValueError("context is only supported in Pydantic v2")
+            if serialize_as_any != False:
+                raise ValueError("serialize_as_any is only supported in Pydantic v2")
             return super().json(  # type: ignore[reportDeprecated]
                 indent=indent,
                 include=include,
@@ -265,7 +396,6 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object:
 
 def is_basemodel(type_: type) -> bool:
     """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
-    origin = get_origin(type_) or type_
     if is_union(type_):
         for variant in get_args(type_):
             if is_basemodel(variant):
@@ -273,15 +403,72 @@ def is_basemodel(type_: type) -> bool:
 
         return False
 
+    return is_basemodel_type(type_)
+
+
+def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
+    origin = get_origin(type_) or type_
+    if not inspect.isclass(origin):
+        return False
     return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
 
 
-def construct_type(*, value: object, type_: type) -> object:
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+    """Loose coercion to the expected type with construction of nested values.
+
+    Note: the returned value from this function is not guaranteed to match the
+    given type.
+    """
+    return cast(_T, construct_type(value=value, type_=type_))
+
+
+def construct_type(*, value: object, type_: object) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
     If the given value does not match the expected type then it is returned as-is.
     """
 
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
+    # we allow `object` as the input type because otherwise, passing things like
+    # `Literal['value']` will be reported as a type error by type checkers
+    type_ = cast("type[object]", type_)
+    if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
+        type_ = type_.__value__  # type: ignore[unreachable]
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        meta: tuple[Any, ...] = get_args(type_)[1:]
+        type_ = extract_type_arg(type_, 0)
+    else:
+        meta = tuple()
+
     # we need to use the origin class for any types that are subscripted generics
     # e.g. Dict[str, object]
     origin = get_origin(type_) or type_
@@ -289,10 +476,32 @@ def construct_type(*, value: object, type_: type) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=type_, value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 
+        # if the type is a discriminated union then we want to construct the right variant
+        # in the union, even if the data doesn't match exactly, otherwise we'd break code
+        # that relies on the constructed class types, e.g.
+        #
+        # class FooType:
+        #   kind: Literal['foo']
+        #   value: str
+        #
+        # class BarType:
+        #   kind: Literal['bar']
+        #   value: int
+        #
+        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
+        # we'd end up constructing `FooType` when it should be `BarType`.
+        discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
+        if discriminator and is_mapping(value):
+            variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
+            if variant_value and isinstance(variant_value, str):
+                variant_type = discriminator.mapping.get(variant_value)
+                if variant_type:
+                    return construct_type(type_=variant_type, value=value)
+
         # if the data is not valid, use the first variant that doesn't fail while deserializing
         for variant in args:
             try:
@@ -309,7 +518,11 @@ def construct_type(*, value: object, type_: type) -> object:
         _, items_type = get_args(type_)  # Dict[_, items_type]
         return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
 
-    if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
+    if (
+        not is_literal_type(type_)
+        and inspect.isclass(origin)
+        and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel))
+    ):
         if is_list(value):
             return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
 
@@ -350,6 +563,132 @@ def construct_type(*, value: object, type_: type) -> object:
     return value
 
 
+@runtime_checkable
+class CachedDiscriminatorType(Protocol):
+    __discriminator__: DiscriminatorDetails
+
+
+class DiscriminatorDetails:
+    field_name: str
+    """The name of the discriminator field in the variant class, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo']
+    ```
+
+    Will result in field_name='type'
+    """
+
+    field_alias_from: str | None
+    """The name of the discriminator field in the API response, e.g.
+
+    ```py
+    class Foo(BaseModel):
+        type: Literal['foo'] = Field(alias='type_from_api')
+    ```
+
+    Will result in field_alias_from='type_from_api'
+    """
+
+    mapping: dict[str, type]
+    """Mapping of discriminator value to variant type, e.g.
+
+    {'foo': FooVariant, 'bar': BarVariant}
+    """
+
+    def __init__(
+        self,
+        *,
+        mapping: dict[str, type],
+        discriminator_field: str,
+        discriminator_alias: str | None,
+    ) -> None:
+        self.mapping = mapping
+        self.field_name = discriminator_field
+        self.field_alias_from = discriminator_alias
+
+
+def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
+    if isinstance(union, CachedDiscriminatorType):
+        return union.__discriminator__
+
+    discriminator_field_name: str | None = None
+
+    for annotation in meta_annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
+            discriminator_field_name = annotation.discriminator
+            break
+
+    if not discriminator_field_name:
+        return None
+
+    mapping: dict[str, type] = {}
+    discriminator_alias: str | None = None
+
+    for variant in get_args(union):
+        variant = strip_annotated_type(variant)
+        if is_basemodel_type(variant):
+            if PYDANTIC_V2:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
+
+                if field_schema["type"] == "literal":
+                    for entry in cast("LiteralSchema", field_schema)["expected"]:
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+            else:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
+                    continue
+
+                # Note: if one variant defines an alias then they all should
+                discriminator_alias = field_info.alias
+
+                if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+                    for entry in get_args(annotation):
+                        if isinstance(entry, str):
+                            mapping[entry] = variant
+
+    if not mapping:
+        return None
+
+    details = DiscriminatorDetails(
+        mapping=mapping,
+        discriminator_field=discriminator_field_name,
+        discriminator_alias=discriminator_alias,
+    )
+    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    return details
+
+
+def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
+    schema = model.__pydantic_core_schema__
+    if schema["type"] == "definitions":
+        schema = schema["schema"]
+
+    if schema["type"] != "model":
+        return None
+
+    schema = cast("ModelSchema", schema)
+    fields_schema = schema["schema"]
+    if fields_schema["type"] != "model-fields":
+        return None
+
+    fields_schema = cast("ModelFieldsSchema", fields_schema)
+    field = fields_schema["fields"].get(field_name)
+    if not field:
+        return None
+
+    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
+
+
 def validate_type(*, type_: type[_T], value: object) -> _T:
     """Strict validation that the given value matches the expected type"""
     if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
@@ -358,7 +697,30 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
     return cast(_T, _validate_non_model_type(type_=type_, value=value))
 
 
-# our use of subclasssing here causes weirdness for type checkers,
+def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
+    """Add a pydantic config for the given type.
+
+    Note: this is a no-op on Pydantic v1.
+    """
+    setattr(typ, "__pydantic_config__", config)  # noqa: B010
+
+
+def add_request_id(obj: BaseModel, request_id: str | None) -> None:
+    obj._request_id = request_id
+
+    # in Pydantic v1, using setattr like we do above causes the attribute
+    # to be included when serializing the model which we don't want in this
+    # case so we need to explicitly exclude it
+    if not PYDANTIC_V2:
+        try:
+            exclude_fields = obj.__exclude_fields__  # type: ignore
+        except AttributeError:
+            cast(Any, obj).__exclude_fields__ = {"_request_id", "__exclude_fields__"}
+        else:
+            cast(Any, obj).__exclude_fields__ = {*(exclude_fields or {}), "_request_id", "__exclude_fields__"}
+
+
+# our use of subclassing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
     GenericModel = BaseModel
@@ -369,7 +731,14 @@ class GenericModel(BaseGenericModel, BaseModel):
 
 
 if PYDANTIC_V2:
-    from pydantic import TypeAdapter
+    from pydantic import TypeAdapter as _TypeAdapter
+
+    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
+
+    if TYPE_CHECKING:
+        from pydantic import TypeAdapter
+    else:
+        TypeAdapter = _CachedTypeAdapter
 
     def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
         return TypeAdapter(type_).validate_python(value)
@@ -382,7 +751,7 @@ class RootModel(GenericModel, Generic[_T]):
 
         For example:
         ```py
-        validated = RootModel[int](__root__='5').__root__
+        validated = RootModel[int](__root__="5").__root__
         # validated: 5
         ```
         """
@@ -408,6 +777,7 @@ class FinalRequestOptionsInput(TypedDict, total=False):
     idempotency_key: str
     json_data: Body
     extra_json: AnyMapping
+    follow_redirects: bool
 
 
 @final
@@ -421,6 +791,7 @@ class FinalRequestOptions(pydantic.BaseModel):
     files: Union[HttpxRequestFiles, None] = None
     idempotency_key: Union[str, None] = None
     post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
+    follow_redirects: Union[bool, None] = None
 
     # It should be noted that we cannot use `json` here as that would override
     # a BaseModel method in an incompatible fashion.
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index fe8e0a2139..a80e939300 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -1,92 +1,149 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 from typing_extensions import override
 
-from . import resources, _load_client
+if TYPE_CHECKING:
+    from .resources.files import Files
+    from .resources.images import Images
+    from .resources.models import Models
+    from .resources.batches import Batches
+    from .resources.webhooks import Webhooks
+    from .resources.beta.beta import Beta
+    from .resources.chat.chat import Chat
+    from .resources.embeddings import Embeddings
+    from .resources.audio.audio import Audio
+    from .resources.completions import Completions
+    from .resources.evals.evals import Evals
+    from .resources.moderations import Moderations
+    from .resources.uploads.uploads import Uploads
+    from .resources.responses.responses import Responses
+    from .resources.containers.containers import Containers
+    from .resources.fine_tuning.fine_tuning import FineTuning
+    from .resources.vector_stores.vector_stores import VectorStores
+
+from . import _load_client
 from ._utils import LazyProxy
 
 
-class ChatProxy(LazyProxy[resources.Chat]):
+class ChatProxy(LazyProxy["Chat"]):
     @override
-    def __load__(self) -> resources.Chat:
+    def __load__(self) -> Chat:
         return _load_client().chat
 
 
-class BetaProxy(LazyProxy[resources.Beta]):
+class BetaProxy(LazyProxy["Beta"]):
     @override
-    def __load__(self) -> resources.Beta:
+    def __load__(self) -> Beta:
         return _load_client().beta
 
 
-class EditsProxy(LazyProxy[resources.Edits]):
+class FilesProxy(LazyProxy["Files"]):
     @override
-    def __load__(self) -> resources.Edits:
-        return _load_client().edits
+    def __load__(self) -> Files:
+        return _load_client().files
 
 
-class FilesProxy(LazyProxy[resources.Files]):
+class AudioProxy(LazyProxy["Audio"]):
     @override
-    def __load__(self) -> resources.Files:
-        return _load_client().files
+    def __load__(self) -> Audio:
+        return _load_client().audio
 
 
-class AudioProxy(LazyProxy[resources.Audio]):
+class EvalsProxy(LazyProxy["Evals"]):
     @override
-    def __load__(self) -> resources.Audio:
-        return _load_client().audio
+    def __load__(self) -> Evals:
+        return _load_client().evals
 
 
-class ImagesProxy(LazyProxy[resources.Images]):
+class ImagesProxy(LazyProxy["Images"]):
     @override
-    def __load__(self) -> resources.Images:
+    def __load__(self) -> Images:
         return _load_client().images
 
 
-class ModelsProxy(LazyProxy[resources.Models]):
+class ModelsProxy(LazyProxy["Models"]):
     @override
-    def __load__(self) -> resources.Models:
+    def __load__(self) -> Models:
         return _load_client().models
 
 
-class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
+class BatchesProxy(LazyProxy["Batches"]):
+    @override
+    def __load__(self) -> Batches:
+        return _load_client().batches
+
+
+class UploadsProxy(LazyProxy["Uploads"]):
+    @override
+    def __load__(self) -> Uploads:
+        return _load_client().uploads
+
+
+class WebhooksProxy(LazyProxy["Webhooks"]):
+    @override
+    def __load__(self) -> Webhooks:
+        return _load_client().webhooks
+
+
+class ResponsesProxy(LazyProxy["Responses"]):
     @override
-    def __load__(self) -> resources.Embeddings:
+    def __load__(self) -> Responses:
+        return _load_client().responses
+
+
+class EmbeddingsProxy(LazyProxy["Embeddings"]):
+    @override
+    def __load__(self) -> Embeddings:
         return _load_client().embeddings
 
 
-class FineTunesProxy(LazyProxy[resources.FineTunes]):
+class ContainersProxy(LazyProxy["Containers"]):
     @override
-    def __load__(self) -> resources.FineTunes:
-        return _load_client().fine_tunes
+    def __load__(self) -> Containers:
+        return _load_client().containers
 
 
-class CompletionsProxy(LazyProxy[resources.Completions]):
+class CompletionsProxy(LazyProxy["Completions"]):
     @override
-    def __load__(self) -> resources.Completions:
+    def __load__(self) -> Completions:
         return _load_client().completions
 
 
-class ModerationsProxy(LazyProxy[resources.Moderations]):
+class ModerationsProxy(LazyProxy["Moderations"]):
     @override
-    def __load__(self) -> resources.Moderations:
+    def __load__(self) -> Moderations:
         return _load_client().moderations
 
 
-class FineTuningProxy(LazyProxy[resources.FineTuning]):
+class FineTuningProxy(LazyProxy["FineTuning"]):
     @override
-    def __load__(self) -> resources.FineTuning:
+    def __load__(self) -> FineTuning:
         return _load_client().fine_tuning
 
 
-chat: resources.Chat = ChatProxy().__as_proxied__()
-beta: resources.Beta = BetaProxy().__as_proxied__()
-edits: resources.Edits = EditsProxy().__as_proxied__()
-files: resources.Files = FilesProxy().__as_proxied__()
-audio: resources.Audio = AudioProxy().__as_proxied__()
-images: resources.Images = ImagesProxy().__as_proxied__()
-models: resources.Models = ModelsProxy().__as_proxied__()
-embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
-fine_tunes: resources.FineTunes = FineTunesProxy().__as_proxied__()
-completions: resources.Completions = CompletionsProxy().__as_proxied__()
-moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
-fine_tuning: resources.FineTuning = FineTuningProxy().__as_proxied__()
+class VectorStoresProxy(LazyProxy["VectorStores"]):
+    @override
+    def __load__(self) -> VectorStores:
+        return _load_client().vector_stores
+
+
+chat: Chat = ChatProxy().__as_proxied__()
+beta: Beta = BetaProxy().__as_proxied__()
+files: Files = FilesProxy().__as_proxied__()
+audio: Audio = AudioProxy().__as_proxied__()
+evals: Evals = EvalsProxy().__as_proxied__()
+images: Images = ImagesProxy().__as_proxied__()
+models: Models = ModelsProxy().__as_proxied__()
+batches: Batches = BatchesProxy().__as_proxied__()
+uploads: Uploads = UploadsProxy().__as_proxied__()
+webhooks: Webhooks = WebhooksProxy().__as_proxied__()
+responses: Responses = ResponsesProxy().__as_proxied__()
+embeddings: Embeddings = EmbeddingsProxy().__as_proxied__()
+containers: Containers = ContainersProxy().__as_proxied__()
+completions: Completions = CompletionsProxy().__as_proxied__()
+moderations: Moderations = ModerationsProxy().__as_proxied__()
+fine_tuning: FineTuning = FineTuningProxy().__as_proxied__()
+vector_stores: VectorStores = VectorStoresProxy().__as_proxied__()
diff --git a/src/openai/_resource.py b/src/openai/_resource.py
index db1b0fa45a..fff9ba19c3 100644
--- a/src/openai/_resource.py
+++ b/src/openai/_resource.py
@@ -1,11 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import time
-import asyncio
 from typing import TYPE_CHECKING
 
+import anyio
+
 if TYPE_CHECKING:
     from ._client import OpenAI, AsyncOpenAI
 
@@ -39,4 +40,4 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self._get_api_list = client.get_api_list
 
     async def _sleep(self, seconds: float) -> None:
-        await asyncio.sleep(seconds)
+        await anyio.sleep(seconds)
diff --git a/src/openai/_response.py b/src/openai/_response.py
index 933c37525e..350da38dd4 100644
--- a/src/openai/_response.py
+++ b/src/openai/_response.py
@@ -1,41 +1,63 @@
 from __future__ import annotations
 
+import os
 import inspect
 import logging
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast
-from typing_extensions import Awaitable, ParamSpec, get_args, override, get_origin
-
+from types import TracebackType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
+from typing_extensions import Awaitable, ParamSpec, override, get_origin
+
+import anyio
 import httpx
+import pydantic
 
-from ._types import NoneType, UnknownResponse, BinaryResponseContent
-from ._utils import is_given
-from ._models import BaseModel, is_basemodel
-from ._constants import RAW_RESPONSE_HEADER
-from ._exceptions import APIResponseValidationError
+from ._types import NoneType
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
+from ._models import BaseModel, is_basemodel, add_request_id
+from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
+from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
+from ._exceptions import OpenAIError, APIResponseValidationError
 
 if TYPE_CHECKING:
     from ._models import FinalRequestOptions
-    from ._base_client import Stream, BaseClient, AsyncStream
+    from ._base_client import BaseClient
 
 
 P = ParamSpec("P")
 R = TypeVar("R")
+_T = TypeVar("_T")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+_AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
 
 log: logging.Logger = logging.getLogger(__name__)
 
 
-class APIResponse(Generic[R]):
+class BaseAPIResponse(Generic[R]):
     _cast_to: type[R]
     _client: BaseClient[Any, Any]
-    _parsed: R | None
-    _stream: bool
+    _parsed_by_type: dict[type[Any], Any]
+    _is_sse_stream: bool
     _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
     _options: FinalRequestOptions
 
     http_response: httpx.Response
 
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
     def __init__(
         self,
         *,
@@ -45,25 +67,16 @@ def __init__(
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
         options: FinalRequestOptions,
+        retries_taken: int = 0,
     ) -> None:
         self._cast_to = cast_to
         self._client = client
-        self._parsed = None
-        self._stream = stream
+        self._parsed_by_type = {}
+        self._is_sse_stream = stream
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
-
-    def parse(self) -> R:
-        if self._parsed is not None:
-            return self._parsed
-
-        parsed = self._parse()
-        if is_given(self._options.post_parser):
-            parsed = self._options.post_parser(parsed)
-
-        self._parsed = parsed
-        return parsed
+        self.retries_taken = retries_taken
 
     @property
     def headers(self) -> httpx.Headers:
@@ -71,6 +84,7 @@ def headers(self) -> httpx.Headers:
 
     @property
     def http_request(self) -> httpx.Request:
+        """Returns the httpx Request instance associated with the current response."""
         return self.http_response.request
 
     @property
@@ -79,20 +93,13 @@ def status_code(self) -> int:
 
     @property
     def url(http://github.com/self) -> httpx.URL:
+        """Returns the URL for which the request was made."""
         return self.http_response.url
 
     @property
     def method(self) -> str:
         return self.http_request.method
 
-    @property
-    def content(self) -> bytes:
-        return self.http_response.content
-
-    @property
-    def text(self) -> str:
-        return self.http_response.text
-
     @property
     def http_version(self) -> str:
         return self.http_response.http_version
@@ -102,13 +109,57 @@ def elapsed(self) -> datetime.timedelta:
         """The time taken for the complete request/response cycle to complete."""
         return self.http_response.elapsed
 
-    def _parse(self) -> R:
-        if self._stream:
+    @property
+    def is_closed(self) -> bool:
+        """Whether or not the response body has been closed.
+
+        If this is False then there is response data that has not been read yet.
+        You must either fully consume the response body or call `.close()`
+        before discarding the response to prevent resource leaks.
+        """
+        return self.http_response.is_closed
+
+    @override
+    def __repr__(self) -> str:
+        return (
+            f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+        )
+
+    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
+        # unwrap `Annotated[T, ...]` -> `T`
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
+
+        if self._is_sse_stream:
+            if to:
+                if not is_stream_class_type(to):
+                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
+
+                return cast(
+                    _T,
+                    to(
+                        cast_to=extract_stream_chunk_type(
+                            to,
+                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
+                        ),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
             if self._stream_cls:
                 return cast(
                     R,
                     self._stream_cls(
-                        cast_to=_extract_stream_chunk_type(self._stream_cls),
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
                         response=self.http_response,
                         client=cast(Any, self._client),
                     ),
@@ -121,13 +172,12 @@ def _parse(self) -> R:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        cast_to = self._cast_to
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -135,9 +185,20 @@ def _parse(self) -> R:
         if cast_to == str:
             return cast(R, response.text)
 
-        origin = get_origin(cast_to) or cast_to
+        if cast_to == bytes:
+            return cast(R, response.content)
 
-        if inspect.isclass(origin) and issubclass(origin, BinaryResponseContent):
+        if cast_to == int:
+            return cast(R, int(response.text))
+
+        if cast_to == float:
+            return cast(R, float(response.text))
+
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
+        # handle the legacy binary response case
+        if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
             return cast(R, cast_to(response))  # type: ignore
 
         if origin == APIResponse:
@@ -153,29 +214,30 @@ def _parse(self) -> R:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        # The check here is necessary as we are subverting the the type system
-        # with casts as the relationship between TypeVars and Types are very strict
-        # which means we must return *exactly* what was input or transform it in a
-        # way that retains the TypeVar state. As we cannot do that in this function
-        # then we have to resort to using `cast`. At the time of writing, we know this
-        # to be safe as we have handled all the types that could be bound to the
-        # `ResponseT` TypeVar, however if that TypeVar is ever updated in the future, then
-        # this function would become unsafe but a type checker would not report an error.
         if (
-            cast_to is not UnknownResponse
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
+            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
+
+        if (
+            cast_to is not object
             and not origin is list
             and not origin is dict
             and not origin is Union
             and not issubclass(origin, BaseModel)
         ):
             raise RuntimeError(
-                f"Invalid state, expected {cast_to} to be a subclass type of {BaseModel}, {dict}, {list} or {Union}."
+                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
             )
 
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
-        content_type, *_ = response.headers.get("content-type").split(";")
-        if content_type != "application/json":
+        content_type, *_ = response.headers.get("content-type", "*").split(";")
+        if not content_type.endswith("json"):
             if is_basemodel(cast_to):
                 try:
                     data = response.json()
@@ -208,9 +270,305 @@ def _parse(self) -> R:
             response=response,
         )
 
-    @override
-    def __repr__(self) -> str:
-        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+class APIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    def parse(self) -> R: ...
+
+    def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `int`
+          - `float`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return self.http_response.read()
+        except httpx.StreamConsumed as exc:
+            # The default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message.
+            raise StreamAlreadyConsumed() from exc
+
+    def text(self) -> str:
+        """Read and decode the response content into a string."""
+        self.read()
+        return self.http_response.text
+
+    def json(self) -> object:
+        """Read and decode the JSON response content."""
+        self.read()
+        return self.http_response.json()
+
+    def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.http_response.close()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        for chunk in self.http_response.iter_bytes(chunk_size):
+            yield chunk
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        for chunk in self.http_response.iter_text(chunk_size):
+            yield chunk
+
+    def iter_lines(self) -> Iterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        for chunk in self.http_response.iter_lines():
+            yield chunk
+
+
+class AsyncAPIResponse(BaseAPIResponse[R]):
+    @property
+    def request_id(self) -> str | None:
+        return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
+
+    @overload
+    async def parse(self, *, to: type[_T]) -> _T: ...
+
+    @overload
+    async def parse(self) -> R: ...
+
+    async def parse(self, *, to: type[_T] | None = None) -> R | _T:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        You can customise the type that the response is parsed into through
+        the `to` argument, e.g.
+
+        ```py
+        from openai import BaseModel
+
+
+        class MyModel(BaseModel):
+            foo: str
+
+
+        obj = response.parse(to=MyModel)
+        print(obj.foo)
+        ```
+
+        We support parsing:
+          - `BaseModel`
+          - `dict`
+          - `list`
+          - `Union`
+          - `str`
+          - `httpx.Response`
+        """
+        cache_key = to if to is not None else self._cast_to
+        cached = self._parsed_by_type.get(cache_key)
+        if cached is not None:
+            return cached  # type: ignore[no-any-return]
+
+        if not self._is_sse_stream:
+            await self.read()
+
+        parsed = self._parse(to=to)
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        if isinstance(parsed, BaseModel):
+            add_request_id(parsed, self.request_id)
+
+        self._parsed_by_type[cache_key] = parsed
+        return cast(R, parsed)
+
+    async def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return await self.http_response.aread()
+        except httpx.StreamConsumed as exc:
+            # the default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message
+            raise StreamAlreadyConsumed() from exc
+
+    async def text(self) -> str:
+        """Read and decode the response content into a string."""
+        await self.read()
+        return self.http_response.text
+
+    async def json(self) -> object:
+        """Read and decode the JSON response content."""
+        await self.read()
+        return self.http_response.json()
+
+    async def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.http_response.aclose()
+
+    async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        async for chunk in self.http_response.aiter_bytes(chunk_size):
+            yield chunk
+
+    async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        async for chunk in self.http_response.aiter_text(chunk_size):
+            yield chunk
+
+    async def iter_lines(self) -> AsyncIterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        async for chunk in self.http_response.aiter_lines():
+            yield chunk
+
+
+class BinaryAPIResponse(APIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes():
+                f.write(data)
+
+
+class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    async def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes():
+                await f.write(data)
+
+
+class StreamedBinaryAPIResponse(APIResponse[bytes]):
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes(chunk_size):
+                f.write(data)
+
+
+class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    async def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes(chunk_size):
+                await f.write(data)
 
 
 class MissingStreamClassError(TypeError):
@@ -220,13 +578,176 @@ def __init__(self) -> None:
         )
 
 
-def _extract_stream_chunk_type(stream_cls: type) -> type:
-    args = get_args(stream_cls)
-    if not args:
-        raise TypeError(
-            f"Expected stream_cls to have been given a generic type argument, e.g. Stream[Foo] but received {stream_cls}",
+class StreamAlreadyConsumed(OpenAIError):
+    """
+    Attempted to read or stream content, but the content has already
+    been streamed.
+
+    This can happen if you use a method like `.iter_lines()` and then attempt
+    to read th entire response body afterwards, e.g.
+
+    ```py
+    response = await client.post(...)
+    async for line in response.iter_lines():
+        ...  # do something with `line`
+
+    content = await response.read()
+    # ^ error
+    ```
+
+    If you want this behaviour you'll need to either manually accumulate the response
+    content or call `await response.read()` before iterating over the stream.
+    """
+
+    def __init__(self) -> None:
+        message = (
+            "Attempted to read or stream some content, but the content has "
+            "already been streamed. "
+            "This could be due to attempting to stream the response "
+            "content more than once."
+            "\n\n"
+            "You can fix this by manually accumulating the response content while streaming "
+            "or by calling `.read()` before starting to stream."
         )
-    return cast(type, args[0])
+        super().__init__(message)
+
+
+class ResponseContextManager(Generic[_APIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
+        self._request_func = request_func
+        self.__response: _APIResponseT | None = None
+
+    def __enter__(self) -> _APIResponseT:
+        self.__response = self._request_func()
+        return self.__response
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            self.__response.close()
+
+
+class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
+        self._api_request = api_request
+        self.__response: _AsyncAPIResponseT | None = None
+
+    async def __aenter__(self) -> _AsyncAPIResponseT:
+        self.__response = await self._api_request
+        return self.__response
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            await self.__response.close()
+
+
+def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
+
+    return wrapped
+
+
+def async_to_streamed_response_wrapper(
+    func: Callable[P, Awaitable[R]],
+) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[AsyncAPIResponse[R]], make_request))
+
+    return wrapped
+
+
+def to_custom_streamed_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, ResponseContextManager[_APIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
+
+    return wrapped
+
+
+def async_to_custom_streamed_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[_AsyncAPIResponseT], make_request))
+
+    return wrapped
 
 
 def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
@@ -236,8 +757,8 @@ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]
 
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
 
         kwargs["extra_headers"] = extra_headers
 
@@ -246,18 +767,82 @@ def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
     return wrapped
 
 
-def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[APIResponse[R]]]:
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
     """Higher order function that takes one of our bound API methods and wraps it
     to support returning the raw `APIResponse` object directly.
     """
 
     @functools.wraps(func)
-    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
-        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
+        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+def to_custom_raw_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, _APIResponseT]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(_APIResponseT, func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_custom_raw_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
+        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
 
         kwargs["extra_headers"] = extra_headers
 
-        return cast(APIResponse[R], await func(*args, **kwargs))
+        return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
 
     return wrapped
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(APIResponse[bytes]):
+        ...
+
+    extract_response_type(MyResponse) -> bytes
+    ```
+    """
+    return extract_type_var_from_base(
+        typ,
+        generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)),
+        index=0,
+    )
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 095746630b..f5621f92a7 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -2,49 +2,55 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING, Any, Generic, Iterator, AsyncIterator
-from typing_extensions import override
+import inspect
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
+from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable
 
 import httpx
 
-from ._types import ResponseT
-from ._utils import is_mapping
+from ._utils import is_mapping, extract_type_var_from_base
 from ._exceptions import APIError
 
 if TYPE_CHECKING:
-    from ._base_client import SyncAPIClient, AsyncAPIClient
+    from ._client import OpenAI, AsyncOpenAI
 
 
-class Stream(Generic[ResponseT]):
+_T = TypeVar("_T")
+
+
+class Stream(Generic[_T]):
     """Provides the core interface to iterate over a synchronous stream response."""
 
     response: httpx.Response
 
+    _decoder: SSEBytesDecoder
+
     def __init__(
         self,
         *,
-        cast_to: type[ResponseT],
+        cast_to: type[_T],
         response: httpx.Response,
-        client: SyncAPIClient,
+        client: OpenAI,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
         self._client = client
-        self._decoder = SSEDecoder()
+        self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
-    def __next__(self) -> ResponseT:
+    def __next__(self) -> _T:
         return self._iterator.__next__()
 
-    def __iter__(self) -> Iterator[ResponseT]:
+    def __iter__(self) -> Iterator[_T]:
         for item in self._iterator:
             yield item
 
     def _iter_events(self) -> Iterator[ServerSentEvent]:
-        yield from self._decoder.iter(self.response.iter_lines())
+        yield from self._decoder.iter_bytes(self.response.iter_bytes())
 
-    def __stream__(self) -> Iterator[ResponseT]:
-        cast_to = self._cast_to
+    def __stream__(self) -> Iterator[_T]:
+        cast_to = cast(Any, self._cast_to)
         response = self.response
         process_data = self._client._process_response_data
         iterator = self._iter_events()
@@ -53,53 +59,100 @@ def __stream__(self) -> Iterator[ResponseT]:
             if sse.data.startswith("[DONE]"):
                 break
 
-            if sse.event is None:
+            if sse.event is None or sse.event.startswith("response.") or sse.event.startswith("transcript."):
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
                     raise APIError(
-                        message="An error ocurred during streaming",
+                        message=message,
                         request=self.response.request,
                         body=data["error"],
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
-        for sse in iterator:
+        for _sse in iterator:
             ...
 
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
 
-class AsyncStream(Generic[ResponseT]):
+        Automatically called if the response body is read to completion.
+        """
+        self.response.close()
+
+
+class AsyncStream(Generic[_T]):
     """Provides the core interface to iterate over an asynchronous stream response."""
 
     response: httpx.Response
 
+    _decoder: SSEDecoder | SSEBytesDecoder
+
     def __init__(
         self,
         *,
-        cast_to: type[ResponseT],
+        cast_to: type[_T],
         response: httpx.Response,
-        client: AsyncAPIClient,
+        client: AsyncOpenAI,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
         self._client = client
-        self._decoder = SSEDecoder()
+        self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
-    async def __anext__(self) -> ResponseT:
+    async def __anext__(self) -> _T:
         return await self._iterator.__anext__()
 
-    async def __aiter__(self) -> AsyncIterator[ResponseT]:
+    async def __aiter__(self) -> AsyncIterator[_T]:
         async for item in self._iterator:
             yield item
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
-        async for sse in self._decoder.aiter(self.response.aiter_lines()):
+        async for sse in self._decoder.aiter_bytes(self.response.aiter_bytes()):
             yield sse
 
-    async def __stream__(self) -> AsyncIterator[ResponseT]:
-        cast_to = self._cast_to
+    async def __stream__(self) -> AsyncIterator[_T]:
+        cast_to = cast(Any, self._cast_to)
         response = self.response
         process_data = self._client._process_response_data
         iterator = self._iter_events()
@@ -108,21 +161,66 @@ async def __stream__(self) -> AsyncIterator[ResponseT]:
             if sse.data.startswith("[DONE]"):
                 break
 
-            if sse.event is None:
+            if sse.event is None or sse.event.startswith("response.") or sse.event.startswith("transcript."):
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
                     raise APIError(
-                        message="An error ocurred during streaming",
+                        message=message,
                         request=self.response.request,
                         body=data["error"],
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
-        async for sse in iterator:
+        async for _sse in iterator:
             ...
 
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.response.aclose()
+
 
 class ServerSentEvent:
     def __init__(
@@ -177,21 +275,49 @@ def __init__(self) -> None:
         self._last_event_id = None
         self._retry = None
 
-    def iter(self, iterator: Iterator[str]) -> Iterator[ServerSentEvent]:
-        """Given an iterator that yields lines, iterate over it & yield every event encountered"""
-        for line in iterator:
-            line = line.rstrip("\n")
-            sse = self.decode(line)
-            if sse is not None:
-                yield sse
-
-    async def aiter(self, iterator: AsyncIterator[str]) -> AsyncIterator[ServerSentEvent]:
-        """Given an async iterator that yields lines, iterate over it & yield every event encountered"""
-        async for line in iterator:
-            line = line.rstrip("\n")
-            sse = self.decode(line)
-            if sse is not None:
-                yield sse
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        for chunk in self._iter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    def _iter_chunks(self, iterator: Iterator[bytes]) -> Iterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
+
+    async def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        async for chunk in self._aiter_chunks(iterator):
+            # Split before decoding so splitlines() only uses \r and \n
+            for raw_line in chunk.splitlines():
+                line = raw_line.decode("utf-8")
+                sse = self.decode(line)
+                if sse:
+                    yield sse
+
+    async def _aiter_chunks(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
+        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
+        data = b""
+        async for chunk in iterator:
+            for line in chunk.splitlines(keepends=True):
+                data += line
+                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
+                    yield data
+                    data = b""
+        if data:
+            yield data
 
     def decode(self, line: str) -> ServerSentEvent | None:
         # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501
@@ -240,3 +366,45 @@ def decode(self, line: str) -> ServerSentEvent | None:
             pass  # Field is ignored.
 
         return None
+
+
+@runtime_checkable
+class SSEBytesDecoder(Protocol):
+    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
+        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+    def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
+        """Given an async iterator that yields raw binary data, iterate over it & yield every event encountered"""
+        ...
+
+
+def is_stream_class_type(typ: type) -> TypeGuard[type[Stream[object]] | type[AsyncStream[object]]]:
+    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
+    origin = get_origin(typ) or typ
+    return inspect.isclass(origin) and issubclass(origin, (Stream, AsyncStream))
+
+
+def extract_stream_chunk_type(
+    stream_cls: type,
+    *,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Stream[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(Stream[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+    from ._base_client import Stream, AsyncStream
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
+        failure_message=failure_message,
+    )
diff --git a/src/openai/_types.py b/src/openai/_types.py
index 9e962a1078..5dae55f4a9 100644
--- a/src/openai/_types.py
+++ b/src/openai/_types.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from os import PathLike
-from abc import ABC, abstractmethod
 from typing import (
     IO,
     TYPE_CHECKING,
@@ -14,25 +13,19 @@
     Mapping,
     TypeVar,
     Callable,
-    Iterator,
     Optional,
     Sequence,
-    AsyncIterator,
-)
-from typing_extensions import (
-    Literal,
-    Protocol,
-    TypeAlias,
-    TypedDict,
-    override,
-    runtime_checkable,
 )
+from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
 
+import httpx
 import pydantic
 from httpx import URL, Proxy, Timeout, Response, BaseTransport, AsyncBaseTransport
 
 if TYPE_CHECKING:
     from ._models import BaseModel
+    from ._response import APIResponse, AsyncAPIResponse
+    from ._legacy_response import HttpxBinaryResponseContent
 
 Transport = BaseTransport
 AsyncTransport = AsyncBaseTransport
@@ -43,168 +36,15 @@
 _T = TypeVar("_T")
 
 
-class BinaryResponseContent(ABC):
-    def __init__(
-        self,
-        response: Any,
-    ) -> None:
-        ...
-
-    @property
-    @abstractmethod
-    def content(self) -> bytes:
-        pass
-
-    @property
-    @abstractmethod
-    def text(self) -> str:
-        pass
-
-    @property
-    @abstractmethod
-    def encoding(self) -> Optional[str]:
-        """
-        Return an encoding to use for decoding the byte content into text.
-        The priority for determining this is given by...
-
-        * `.encoding = <>` has been set explicitly.
-        * The encoding as specified by the charset parameter in the Content-Type header.
-        * The encoding as determined by `default_encoding`, which may either be
-          a string like "utf-8" indicating the encoding to use, or may be a callable
-          which enables charset autodetection.
-        """
-        pass
-
-    @property
-    @abstractmethod
-    def charset_encoding(self) -> Optional[str]:
-        """
-        Return the encoding, as specified by the Content-Type header.
-        """
-        pass
-
-    @abstractmethod
-    def json(self, **kwargs: Any) -> Any:
-        pass
-
-    @abstractmethod
-    def read(self) -> bytes:
-        """
-        Read and return the response content.
-        """
-        pass
-
-    @abstractmethod
-    def iter_bytes(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
-        """
-        pass
-
-    @abstractmethod
-    def iter_text(self, chunk_size: Optional[int] = None) -> Iterator[str]:
-        """
-        A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        pass
-
-    @abstractmethod
-    def iter_lines(self) -> Iterator[str]:
-        pass
-
-    @abstractmethod
-    def iter_raw(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        """
-        A byte-iterator over the raw response content.
-        """
-        pass
-
-    @abstractmethod
-    def stream_to_file(
-        self,
-        file: str | PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """
-        Stream the output to the given file.
-        """
-        pass
-
-    @abstractmethod
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-        Automatically called if the response body is read to completion.
-        """
-        pass
-
-    @abstractmethod
-    async def aread(self) -> bytes:
-        """
-        Read and return the response content.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
-        """
-        A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_lines(self) -> AsyncIterator[str]:
-        pass
-
-    @abstractmethod
-    async def aiter_raw(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        """
-        A byte-iterator over the raw response content.
-        """
-        pass
-
-    @abstractmethod
-    async def astream_to_file(
-        self,
-        file: str | PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """
-        Stream the output to the given file.
-        """
-        pass
-
-    @abstractmethod
-    async def aclose(self) -> None:
-        """
-        Close the response and release the connection.
-        Automatically called if the response body is read to completion.
-        """
-        pass
-
-
 # Approximates httpx internal ProxiesTypes and RequestFiles types
 # while adding support for `PathLike` instances
 ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
 ProxiesTypes = Union[str, Proxy, ProxiesDict]
 if TYPE_CHECKING:
+    Base64FileInput = Union[IO[bytes], PathLike[str]]
     FileContent = Union[IO[bytes], bytes, PathLike[str]]
 else:
+    Base64FileInput = Union[IO[bytes], PathLike]
     FileContent = Union[IO[bytes], bytes, PathLike]  # PathLike is not subscriptable in Python 3.8.
 FileTypes = Union[
     # file (or bytes)
@@ -261,11 +101,7 @@ class RequestOptions(TypedDict, total=False):
     params: Query
     extra_json: AnyMapping
     idempotency_key: str
-
-
-# Sentinel class used when the response type is an object with an unknown schema
-class UnknownResponse:
-    ...
+    follow_redirects: bool
 
 
 # Sentinel class used until PEP 0661 is accepted
@@ -279,9 +115,10 @@ class NotGiven:
     ```py
     def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...
 
-    get(timeout=1) # 1s timeout
-    get(timeout=None) # No timeout
-    get() # Default timeout behavior, which may not be statically known at the method definition.
+
+    get(timeout=1)  # 1s timeout
+    get(timeout=None)  # No timeout
+    get()  # Default timeout behavior, which may not be statically known at the method definition.
     ```
     """
 
@@ -303,14 +140,14 @@ class Omit:
 
     ```py
     # as the default `Content-Type` header is `application/json` that will be sent
-    client.post('/upload/files', files={'file': b'my raw file content'})
+    client.post("/upload/files", files={"file": b"my raw file content"})
 
     # you can't explicitly override the header as it has to be dynamically generated
     # to look something like: 'multipart/form-data; boundary=0d8382fcf5f8c3be01ca2e11002d2983'
-    client.post(..., headers={'Content-Type': 'multipart/form-data'})
+    client.post(..., headers={"Content-Type": "multipart/form-data"})
 
     # instead you can remove the default `application/json` header by passing Omit
-    client.post(..., headers={'Content-Type': Omit()})
+    client.post(..., headers={"Content-Type": Omit()})
     ```
     """
 
@@ -326,29 +163,59 @@ def build(
         *,
         response: Response,
         data: object,
-    ) -> _T:
-        ...
+    ) -> _T: ...
 
 
 Headers = Mapping[str, Union[str, Omit]]
 
 
 class HeadersLikeProtocol(Protocol):
-    def get(self, __key: str) -> str | None:
-        ...
+    def get(self, __key: str) -> str | None: ...
 
 
 HeadersLike = Union[Headers, HeadersLikeProtocol]
 
 ResponseT = TypeVar(
     "ResponseT",
-    bound="Union[str, None, BaseModel, List[Any], Dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol, BinaryResponseContent]",
+    bound=Union[
+        object,
+        str,
+        None,
+        "BaseModel",
+        List[Any],
+        Dict[str, Any],
+        Response,
+        ModelBuilderProtocol,
+        "APIResponse[Any]",
+        "AsyncAPIResponse[Any]",
+        "HttpxBinaryResponseContent",
+    ],
 )
 
 StrBytesIntFloat = Union[str, bytes, int, float]
 
 # Note: copied from Pydantic
-# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+# https://github.com/pydantic/pydantic/blob/6f31f8f68ef011f84357330186f603ff295312fd/pydantic/main.py#L79
+IncEx: TypeAlias = Union[Set[int], Set[str], Mapping[int, Union["IncEx", bool]], Mapping[str, Union["IncEx", bool]]]
 
 PostParser = Callable[[Any], Any]
+
+
+@runtime_checkable
+class InheritsGeneric(Protocol):
+    """Represents a type that has inherited from `Generic`
+
+    The `__orig_bases__` property can be used to determine the resolved
+    type variable for a given base class.
+    """
+
+    __orig_bases__: tuple[_GenericAlias]
+
+
+class _GenericAlias(Protocol):
+    __origin__: type[object]
+
+
+class HttpxSendArgs(TypedDict, total=False):
+    auth: httpx.Auth
+    follow_redirects: bool
diff --git a/src/openai/_utils/__init__.py b/src/openai/_utils/__init__.py
index d3397212de..bd01c088dc 100644
--- a/src/openai/_utils/__init__.py
+++ b/src/openai/_utils/__init__.py
@@ -1,36 +1,60 @@
+from ._logs import SensitiveHeadersFilter as SensitiveHeadersFilter
+from ._sync import asyncify as asyncify
 from ._proxy import LazyProxy as LazyProxy
-from ._utils import flatten as flatten
-from ._utils import is_dict as is_dict
-from ._utils import is_list as is_list
-from ._utils import is_given as is_given
-from ._utils import is_tuple as is_tuple
-from ._utils import is_mapping as is_mapping
-from ._utils import is_tuple_t as is_tuple_t
-from ._utils import parse_date as parse_date
-from ._utils import is_sequence as is_sequence
-from ._utils import coerce_float as coerce_float
-from ._utils import is_list_type as is_list_type
-from ._utils import is_mapping_t as is_mapping_t
-from ._utils import removeprefix as removeprefix
-from ._utils import removesuffix as removesuffix
-from ._utils import extract_files as extract_files
-from ._utils import is_sequence_t as is_sequence_t
-from ._utils import is_union_type as is_union_type
-from ._utils import required_args as required_args
-from ._utils import coerce_boolean as coerce_boolean
-from ._utils import coerce_integer as coerce_integer
-from ._utils import file_from_path as file_from_path
-from ._utils import parse_datetime as parse_datetime
-from ._utils import strip_not_given as strip_not_given
-from ._utils import deepcopy_minimal as deepcopy_minimal
-from ._utils import extract_type_arg as extract_type_arg
-from ._utils import is_required_type as is_required_type
-from ._utils import is_annotated_type as is_annotated_type
-from ._utils import maybe_coerce_float as maybe_coerce_float
-from ._utils import get_required_header as get_required_header
-from ._utils import maybe_coerce_boolean as maybe_coerce_boolean
-from ._utils import maybe_coerce_integer as maybe_coerce_integer
-from ._utils import strip_annotated_type as strip_annotated_type
-from ._transform import PropertyInfo as PropertyInfo
-from ._transform import transform as transform
-from ._transform import maybe_transform as maybe_transform
+from ._utils import (
+    flatten as flatten,
+    is_dict as is_dict,
+    is_list as is_list,
+    is_given as is_given,
+    is_tuple as is_tuple,
+    json_safe as json_safe,
+    lru_cache as lru_cache,
+    is_mapping as is_mapping,
+    is_tuple_t as is_tuple_t,
+    parse_date as parse_date,
+    is_iterable as is_iterable,
+    is_sequence as is_sequence,
+    coerce_float as coerce_float,
+    is_mapping_t as is_mapping_t,
+    removeprefix as removeprefix,
+    removesuffix as removesuffix,
+    extract_files as extract_files,
+    is_sequence_t as is_sequence_t,
+    required_args as required_args,
+    coerce_boolean as coerce_boolean,
+    coerce_integer as coerce_integer,
+    file_from_path as file_from_path,
+    parse_datetime as parse_datetime,
+    is_azure_client as is_azure_client,
+    strip_not_given as strip_not_given,
+    deepcopy_minimal as deepcopy_minimal,
+    get_async_library as get_async_library,
+    maybe_coerce_float as maybe_coerce_float,
+    get_required_header as get_required_header,
+    maybe_coerce_boolean as maybe_coerce_boolean,
+    maybe_coerce_integer as maybe_coerce_integer,
+    is_async_azure_client as is_async_azure_client,
+)
+from ._typing import (
+    is_list_type as is_list_type,
+    is_union_type as is_union_type,
+    extract_type_arg as extract_type_arg,
+    is_iterable_type as is_iterable_type,
+    is_required_type as is_required_type,
+    is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
+    strip_annotated_type as strip_annotated_type,
+    extract_type_var_from_base as extract_type_var_from_base,
+)
+from ._streams import consume_sync_iterator as consume_sync_iterator, consume_async_iterator as consume_async_iterator
+from ._transform import (
+    PropertyInfo as PropertyInfo,
+    transform as transform,
+    async_transform as async_transform,
+    maybe_transform as maybe_transform,
+    async_maybe_transform as async_maybe_transform,
+)
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
diff --git a/src/openai/_utils/_logs.py b/src/openai/_utils/_logs.py
index e5113fd8c0..376946933c 100644
--- a/src/openai/_utils/_logs.py
+++ b/src/openai/_utils/_logs.py
@@ -1,10 +1,16 @@
 import os
 import logging
+from typing_extensions import override
+
+from ._utils import is_dict
 
 logger: logging.Logger = logging.getLogger("openai")
 httpx_logger: logging.Logger = logging.getLogger("httpx")
 
 
+SENSITIVE_HEADERS = {"api-key", "authorization"}
+
+
 def _basic_config() -> None:
     # e.g. [2023-10-05 14:12:26 - openai._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
     logging.basicConfig(
@@ -23,3 +29,14 @@ def setup_logging() -> None:
         _basic_config()
         logger.setLevel(logging.INFO)
         httpx_logger.setLevel(logging.INFO)
+
+
+class SensitiveHeadersFilter(logging.Filter):
+    @override
+    def filter(self, record: logging.LogRecord) -> bool:
+        if is_dict(record.args) and "headers" in record.args and is_dict(record.args["headers"]):
+            headers = record.args["headers"] = {**record.args["headers"]}
+            for header in headers:
+                if str(header).lower() in SENSITIVE_HEADERS:
+                    headers[header] = "<redacted>"
+        return True
diff --git a/src/openai/_utils/_proxy.py b/src/openai/_utils/_proxy.py
index aa934a3fbc..0f239a33c6 100644
--- a/src/openai/_utils/_proxy.py
+++ b/src/openai/_utils/_proxy.py
@@ -2,7 +2,7 @@
 
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar, Iterable, cast
-from typing_extensions import ClassVar, override
+from typing_extensions import override
 
 T = TypeVar("T")
 
@@ -10,52 +10,56 @@
 class LazyProxy(Generic[T], ABC):
     """Implements data methods to pretend that an instance is another instance.
 
-    This includes forwarding attribute access and othe methods.
+    This includes forwarding attribute access and other methods.
     """
 
-    should_cache: ClassVar[bool] = False
-
-    def __init__(self) -> None:
-        self.__proxied: T | None = None
+    # Note: we have to special case proxies that themselves return proxies
+    # to support using a proxy as a catch-all for any random access, e.g. `proxy.foo.bar.baz`
 
     def __getattr__(self, attr: str) -> object:
-        return getattr(self.__get_proxied__(), attr)
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied  # pyright: ignore
+        return getattr(proxied, attr)
 
     @override
     def __repr__(self) -> str:
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
         return repr(self.__get_proxied__())
 
     @override
     def __str__(self) -> str:
-        return str(self.__get_proxied__())
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return proxied.__class__.__name__
+        return str(proxied)
 
     @override
     def __dir__(self) -> Iterable[str]:
-        return self.__get_proxied__().__dir__()
+        proxied = self.__get_proxied__()
+        if isinstance(proxied, LazyProxy):
+            return []
+        return proxied.__dir__()
 
     @property  # type: ignore
     @override
-    def __class__(self) -> type:
-        return self.__get_proxied__().__class__
+    def __class__(self) -> type:  # pyright: ignore
+        try:
+            proxied = self.__get_proxied__()
+        except Exception:
+            return type(self)
+        if issubclass(type(proxied), LazyProxy):
+            return type(proxied)
+        return proxied.__class__
 
     def __get_proxied__(self) -> T:
-        if not self.should_cache:
-            return self.__load__()
-
-        proxied = self.__proxied
-        if proxied is not None:
-            return proxied
-
-        self.__proxied = proxied = self.__load__()
-        return proxied
-
-    def __set_proxied__(self, value: T) -> None:
-        self.__proxied = value
+        return self.__load__()
 
     def __as_proxied__(self) -> T:
         """Helper method that returns the current proxy, typed as the loaded object"""
         return cast(T, self)
 
     @abstractmethod
-    def __load__(self) -> T:
-        ...
+    def __load__(self) -> T: ...
diff --git a/src/openai/_utils/_reflection.py b/src/openai/_utils/_reflection.py
new file mode 100644
index 0000000000..bdaca29e4a
--- /dev/null
+++ b/src/openai/_utils/_reflection.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, Callable
+
+
+def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
+    """Returns whether or not the given function has a specific parameter"""
+    sig = inspect.signature(func)
+    return arg_name in sig.parameters
+
+
+def assert_signatures_in_sync(
+    source_func: Callable[..., Any],
+    check_func: Callable[..., Any],
+    *,
+    exclude_params: set[str] = set(),
+    description: str = "",
+) -> None:
+    """Ensure that the signature of the second function matches the first."""
+
+    check_sig = inspect.signature(check_func)
+    source_sig = inspect.signature(source_func)
+
+    errors: list[str] = []
+
+    for name, source_param in source_sig.parameters.items():
+        if name in exclude_params:
+            continue
+
+        custom_param = check_sig.parameters.get(name)
+        if not custom_param:
+            errors.append(f"the `{name}` param is missing")
+            continue
+
+        if custom_param.annotation != source_param.annotation:
+            errors.append(
+                f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(custom_param.annotation)}"
+            )
+            continue
+
+    if errors:
+        raise AssertionError(
+            f"{len(errors)} errors encountered when comparing signatures{description}:\n\n" + "\n\n".join(errors)
+        )
diff --git a/src/openai/_utils/_resources_proxy.py b/src/openai/_utils/_resources_proxy.py
new file mode 100644
index 0000000000..e5b9ec7a37
--- /dev/null
+++ b/src/openai/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `openai.resources` module.
+
+    This is used so that we can lazily import `openai.resources` only when
+    needed *and* so that users can just import `openai` and reference `openai.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("openai.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/openai/_utils/_streams.py b/src/openai/_utils/_streams.py
new file mode 100644
index 0000000000..f4a0208f01
--- /dev/null
+++ b/src/openai/_utils/_streams.py
@@ -0,0 +1,12 @@
+from typing import Any
+from typing_extensions import Iterator, AsyncIterator
+
+
+def consume_sync_iterator(iterator: Iterator[Any]) -> None:
+    for _ in iterator:
+        ...
+
+
+async def consume_async_iterator(iterator: AsyncIterator[Any]) -> None:
+    async for _ in iterator:
+        ...
diff --git a/src/openai/_utils/_sync.py b/src/openai/_utils/_sync.py
new file mode 100644
index 0000000000..ad7ec71b76
--- /dev/null
+++ b/src/openai/_utils/_sync.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import sys
+import asyncio
+import functools
+import contextvars
+from typing import Any, TypeVar, Callable, Awaitable
+from typing_extensions import ParamSpec
+
+import anyio
+import sniffio
+import anyio.to_thread
+
+T_Retval = TypeVar("T_Retval")
+T_ParamSpec = ParamSpec("T_ParamSpec")
+
+
+if sys.version_info >= (3, 9):
+    _asyncio_to_thread = asyncio.to_thread
+else:
+    # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    # for Python 3.8 support
+    async def _asyncio_to_thread(
+        func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> Any:
+        """Asynchronously run function *func* in a separate thread.
+
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+
+        Returns a coroutine that can be awaited to get the eventual result of *func*.
+        """
+        loop = asyncio.events.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
+
+
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await _asyncio_to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+    """
+    Take a blocking function and create an async one that receives the same
+    positional and keyword arguments. For python version 3.9 and above, it uses
+    asyncio.to_thread to run the function in a separate thread. For python version
+    3.8, it uses locally defined copy of the asyncio.to_thread function which was
+    introduced in python 3.9.
+
+    Usage:
+
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
+
+
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
+    ```
+
+    ## Arguments
+
+    `function`: a blocking regular callable (e.g. a function)
+
+    ## Return
+
+    An async function that takes the same positional and keyword arguments as the
+    original one, that when called runs the same original function in a thread worker
+    and returns the result.
+    """
+
+    async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
+        return await to_thread(function, *args, **kwargs)
+
+    return wrapper
diff --git a/src/openai/_utils/_transform.py b/src/openai/_utils/_transform.py
index 769f7362b9..4fd49a1908 100644
--- a/src/openai/_utils/_transform.py
+++ b/src/openai/_utils/_transform.py
@@ -1,22 +1,33 @@
 from __future__ import annotations
 
+import io
+import base64
+import pathlib
 from typing import Any, Mapping, TypeVar, cast
 from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
 
+import anyio
 import pydantic
 
 from ._utils import (
     is_list,
+    is_given,
+    lru_cache,
     is_mapping,
+    is_iterable,
+)
+from .._files import is_base64_file_input
+from ._typing import (
     is_list_type,
     is_union_type,
     extract_type_arg,
+    is_iterable_type,
     is_required_type,
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -25,7 +36,7 @@
 # TODO: ensure works correctly with forward references in all cases
 
 
-PropertyFormat = Literal["iso8601", "custom"]
+PropertyFormat = Literal["iso8601", "base64", "custom"]
 
 
 class PropertyInfo:
@@ -42,6 +53,7 @@ class MyParams(TypedDict):
     alias: str | None
     format: PropertyFormat | None
     format_template: str | None
+    discriminator: str | None
 
     def __init__(
         self,
@@ -49,14 +61,16 @@ def __init__(
         alias: str | None = None,
         format: PropertyFormat | None = None,
         format_template: str | None = None,
+        discriminator: str | None = None,
     ) -> None:
         self.alias = alias
         self.format = format
         self.format_template = format_template
+        self.discriminator = discriminator
 
     @override
     def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}')"
+        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"
 
 
 def maybe_transform(
@@ -81,9 +95,10 @@ def transform(
 
     ```py
     class Params(TypedDict, total=False):
-        card_id: Required[Annotated[str, PropertyInfo(alias='cardID')]]
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
 
-    transformed = transform({'card_id': '<my card ID>'}, Params)
+    transformed = transform({"card_id": "<my card ID>"}, Params)
     # {'cardID': '<my card ID>'}
     ```
 
@@ -95,6 +110,7 @@ class Params(TypedDict, total=False):
     return cast(_T, transformed)
 
 
+@lru_cache(maxsize=8096)
 def _get_annotated_type(type_: type) -> type | None:
     """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
 
@@ -113,7 +129,7 @@ def _get_annotated_type(type_: type) -> type | None:
 def _maybe_transform_key(key: str, type_: type) -> str:
     """Transform the given `data` based on the annotations provided in `type_`.
 
-    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    Note: this function only looks at `Annotated` types that contain `PropertyInfo` metadata.
     """
     annotated_type = _get_annotated_type(type_)
     if annotated_type is None:
@@ -129,6 +145,10 @@ def _maybe_transform_key(key: str, type_: type) -> str:
     return key
 
 
+def _no_transform_needed(annotation: type) -> bool:
+    return annotation == float or annotation == int
+
+
 def _transform_recursive(
     data: object,
     *,
@@ -151,11 +171,35 @@ def _transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
-    if is_list_type(stripped_type) and is_list(data):
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -168,13 +212,9 @@ def _transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
-
-    return _transform_value(data, annotation)
-
+        return model_dump(data, exclude_unset=True, mode="json", exclude=getattr(data, "__api_exclude__", None))
 
-def _transform_value(data: object, type_: type) -> object:
-    annotated_type = _get_annotated_type(type_)
+    annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
         return data
 
@@ -195,6 +235,22 @@ def _format_data(data: object, format_: PropertyFormat, format_template: str | N
         if format_ == "custom" and format_template is not None:
             return data.strftime(format_template)
 
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = data.read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
     return data
 
 
@@ -205,6 +261,11 @@ def _transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -212,3 +273,175 @@ def _transform_typeddict(
         else:
             result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
     return result
+
+
+async def async_maybe_transform(
+    data: object,
+    expected_type: object,
+) -> Any | None:
+    """Wrapper over `async_transform()` that allows `None` to be passed.
+
+    See `async_transform()` for more details.
+    """
+    if data is None:
+        return None
+    return await async_transform(data, expected_type)
+
+
+async def async_transform(
+    data: _T,
+    expected_type: object,
+) -> _T:
+    """Transform dictionaries based off of type information from the given type, for example:
+
+    ```py
+    class Params(TypedDict, total=False):
+        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
+
+
+    transformed = transform({"card_id": "<my card ID>"}, Params)
+    # {'cardID': '<my card ID>'}
+    ```
+
+    Any keys / data that does not have type information given will be included as is.
+
+    It should be noted that the transformations that this function does are not represented in the type system.
+    """
+    transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
+    return cast(_T, transformed)
+
+
+async def _async_transform_recursive(
+    data: object,
+    *,
+    annotation: type,
+    inner_type: type | None = None,
+) -> object:
+    """Transform the given data against the expected type.
+
+    Args:
+        annotation: The direct type annotation given to the particular piece of data.
+            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
+
+        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
+            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
+            the list can be transformed using the metadata from the container type.
+
+            Defaults to the same value as the `annotation` argument.
+    """
+    if inner_type is None:
+        inner_type = annotation
+
+    stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
+    if is_typeddict(stripped_type) and is_mapping(data):
+        return await _async_transform_typeddict(data, stripped_type)
+
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
+    if (
+        # List[T]
+        (is_list_type(stripped_type) and is_list(data))
+        # Iterable[T]
+        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+    ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
+        inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
+        return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
+
+    if is_union_type(stripped_type):
+        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
+        #
+        # TODO: there may be edge cases where the same normalized field name will transform to two different names
+        # in different subtypes.
+        for subtype in get_args(stripped_type):
+            data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
+        return data
+
+    if isinstance(data, pydantic.BaseModel):
+        return model_dump(data, exclude_unset=True, mode="json")
+
+    annotated_type = _get_annotated_type(annotation)
+    if annotated_type is None:
+        return data
+
+    # ignore the first argument as it is the actual type
+    annotations = get_args(annotated_type)[1:]
+    for annotation in annotations:
+        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
+            return await _async_format_data(data, annotation.format, annotation.format_template)
+
+    return data
+
+
+async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
+    if isinstance(data, (date, datetime)):
+        if format_ == "iso8601":
+            return data.isoformat()
+
+        if format_ == "custom" and format_template is not None:
+            return data.strftime(format_template)
+
+    if format_ == "base64" and is_base64_file_input(data):
+        binary: str | bytes | None = None
+
+        if isinstance(data, pathlib.Path):
+            binary = await anyio.Path(data).read_bytes()
+        elif isinstance(data, io.IOBase):
+            binary = data.read()
+
+            if isinstance(binary, str):  # type: ignore[unreachable]
+                binary = binary.encode()
+
+        if not isinstance(binary, bytes):
+            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
+
+        return base64.b64encode(binary).decode("ascii")
+
+    return data
+
+
+async def _async_transform_typeddict(
+    data: Mapping[str, object],
+    expected_type: type,
+) -> Mapping[str, object]:
+    result: dict[str, object] = {}
+    annotations = get_type_hints(expected_type, include_extras=True)
+    for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
+        type_ = annotations.get(key)
+        if type_ is None:
+            # we do not have a type annotation for this field, leave it as is
+            result[key] = value
+        else:
+            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
+    return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+    obj: Any,
+    globalns: dict[str, Any] | None = None,
+    localns: Mapping[str, Any] | None = None,
+    include_extras: bool = False,
+) -> dict[str, Any]:
+    return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/openai/_utils/_typing.py b/src/openai/_utils/_typing.py
new file mode 100644
index 0000000000..1bac9542e2
--- /dev/null
+++ b/src/openai/_utils/_typing.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+import sys
+import typing
+import typing_extensions
+from typing import Any, TypeVar, Iterable, cast
+from collections import abc as _c_abc
+from typing_extensions import (
+    TypeIs,
+    Required,
+    Annotated,
+    get_args,
+    get_origin,
+)
+
+from ._utils import lru_cache
+from .._types import InheritsGeneric
+from .._compat import is_union as _is_union
+
+
+def is_annotated_type(typ: type) -> bool:
+    return get_origin(typ) == Annotated
+
+
+def is_list_type(typ: type) -> bool:
+    return (get_origin(typ) or typ) == list
+
+
+def is_iterable_type(typ: type) -> bool:
+    """If the given type is `typing.Iterable[T]`"""
+    origin = get_origin(typ) or typ
+    return origin == Iterable or origin == _c_abc.Iterable
+
+
+def is_union_type(typ: type) -> bool:
+    return _is_union(get_origin(typ))
+
+
+def is_required_type(typ: type) -> bool:
+    return get_origin(typ) == Required
+
+
+def is_typevar(typ: type) -> bool:
+    # type ignore is required because type checkers
+    # think this expression will always return False
+    return type(typ) == TypeVar  # type: ignore
+
+
+_TYPE_ALIAS_TYPES: tuple[type[typing_extensions.TypeAliasType], ...] = (typing_extensions.TypeAliasType,)
+if sys.version_info >= (3, 12):
+    _TYPE_ALIAS_TYPES = (*_TYPE_ALIAS_TYPES, typing.TypeAliasType)
+
+
+def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
+    """Return whether the provided argument is an instance of `TypeAliasType`.
+
+    ```python
+    type Int = int
+    is_type_alias_type(Int)
+    # > True
+    Str = TypeAliasType("Str", str)
+    is_type_alias_type(Str)
+    # > True
+    ```
+    """
+    return isinstance(tp, _TYPE_ALIAS_TYPES)
+
+
+# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
+def strip_annotated_type(typ: type) -> type:
+    if is_required_type(typ) or is_annotated_type(typ):
+        return strip_annotated_type(cast(type, get_args(typ)[0]))
+
+    return typ
+
+
+def extract_type_arg(typ: type, index: int) -> type:
+    args = get_args(typ)
+    try:
+        return cast(type, args[index])
+    except IndexError as err:
+        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
+
+
+def extract_type_var_from_base(
+    typ: type,
+    *,
+    generic_bases: tuple[type, ...],
+    index: int,
+    failure_message: str | None = None,
+) -> type:
+    """Given a type like `Foo[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(Foo[bytes]):
+        ...
+
+    extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
+    ```
+
+    And where a generic subclass is given:
+    ```py
+    _T = TypeVar('_T')
+    class MyResponse(Foo[_T]):
+        ...
+
+    extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
+    ```
+    """
+    cls = cast(object, get_origin(typ) or typ)
+    if cls in generic_bases:  # pyright: ignore[reportUnnecessaryContains]
+        # we're given the class directly
+        return extract_type_arg(typ, index)
+
+    # if a subclass is given
+    # ---
+    # this is needed as __orig_bases__ is not present in the typeshed stubs
+    # because it is intended to be for internal use only, however there does
+    # not seem to be a way to resolve generic TypeVars for inherited subclasses
+    # without using it.
+    if isinstance(cls, InheritsGeneric):
+        target_base_class: Any | None = None
+        for base in cls.__orig_bases__:
+            if base.__origin__ in generic_bases:
+                target_base_class = base
+                break
+
+        if target_base_class is None:
+            raise RuntimeError(
+                "Could not find the generic base class;\n"
+                "This should never happen;\n"
+                f"Does {cls} inherit from one of {generic_bases} ?"
+            )
+
+        extracted = extract_type_arg(target_base_class, index)
+        if is_typevar(extracted):
+            # If the extracted type argument is itself a type variable
+            # then that means the subclass itself is generic, so we have
+            # to resolve the type argument from the class itself, not
+            # the base class.
+            #
+            # Note: if there is more than 1 type argument, the subclass could
+            # change the ordering of the type arguments, this is not currently
+            # supported.
+            return extract_type_arg(typ, index)
+
+        return extracted
+
+    raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index 4b51dcb2e8..1e7d013b51 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -5,6 +5,7 @@
 import inspect
 import functools
 from typing import (
+    TYPE_CHECKING,
     Any,
     Tuple,
     Mapping,
@@ -16,12 +17,13 @@
     overload,
 )
 from pathlib import Path
-from typing_extensions import Required, Annotated, TypeGuard, get_args, get_origin
+from datetime import date, datetime
+from typing_extensions import TypeGuard
 
-from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
-from .._compat import is_union as _is_union
-from .._compat import parse_date as parse_date
-from .._compat import parse_datetime as parse_datetime
+import sniffio
+
+from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
+from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
 
 _T = TypeVar("_T")
 _TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
@@ -29,6 +31,9 @@
 _SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
 CallableT = TypeVar("CallableT", bound=Callable[..., Any])
 
+if TYPE_CHECKING:
+    from ..lib.azure import AzureOpenAI, AsyncAzureOpenAI
+
 
 def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
     return [item for sublist in t for item in sublist]
@@ -71,8 +76,16 @@ def _extract_items(
         from .._files import assert_is_file_content
 
         # We have exhausted the path, return the entry we found.
-        assert_is_file_content(obj, key=flattened_key)
         assert flattened_key is not None
+
+        if is_list(obj):
+            files: list[tuple[str, FileTypes]] = []
+            for entry in obj:
+                assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+                files.append((flattened_key + "[]", cast(FileTypes, entry)))
+            return files
+
+        assert_is_file_content(obj, key=flattened_key)
         return [(flattened_key, cast(FileTypes, obj))]
 
     index += 1
@@ -164,36 +177,8 @@ def is_list(obj: object) -> TypeGuard[list[object]]:
     return isinstance(obj, list)
 
 
-def is_annotated_type(typ: type) -> bool:
-    return get_origin(typ) == Annotated
-
-
-def is_list_type(typ: type) -> bool:
-    return (get_origin(typ) or typ) == list
-
-
-def is_union_type(typ: type) -> bool:
-    return _is_union(get_origin(typ))
-
-
-def is_required_type(typ: type) -> bool:
-    return get_origin(typ) == Required
-
-
-# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
-def strip_annotated_type(typ: type) -> type:
-    if is_required_type(typ) or is_annotated_type(typ):
-        return strip_annotated_type(cast(type, get_args(typ)[0]))
-
-    return typ
-
-
-def extract_type_arg(typ: type, index: int) -> type:
-    args = get_args(typ)
-    try:
-        return cast(type, args[index])
-    except IndexError:
-        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not")
+def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
+    return isinstance(obj, Iterable)
 
 
 def deepcopy_minimal(item: _T) -> _T:
@@ -228,7 +213,7 @@ def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> s
 
 def quote(string: str) -> str:
     """Add single quotation marks around the given string. Does *not* do any escaping."""
-    return "'" + string + "'"
+    return f"'{string}'"
 
 
 def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
@@ -239,18 +224,17 @@ def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
     Example usage:
     ```py
     @overload
-    def foo(*, a: str) -> str:
-        ...
+    def foo(*, a: str) -> str: ...
+
 
     @overload
-    def foo(*, b: bool) -> str:
-        ...
+    def foo(*, b: bool) -> str: ...
+
 
     # This enforces the same constraints that a static type checker would
     # i.e. that either a or b must be passed to the function
-    @required_args(['a'], ['b'])
-    def foo(*, a: str | None = None, b: bool | None = None) -> str:
-        ...
+    @required_args(["a"], ["b"])
+    def foo(*, a: str | None = None, b: bool | None = None) -> str: ...
     ```
     """
 
@@ -273,7 +257,9 @@ def wrapper(*args: object, **kwargs: object) -> object:
                 try:
                     given_params.add(positional[i])
                 except IndexError:
-                    raise TypeError(f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given")
+                    raise TypeError(
+                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+                    ) from None
 
             for key in kwargs.keys():
                 given_params.add(key)
@@ -289,6 +275,8 @@ def wrapper(*args: object, **kwargs: object) -> object:
                     )
                     msg = f"Missing required arguments; Expected either {variations} arguments to be given"
                 else:
+                    assert len(variants) > 0
+
                     # TODO: this error message is not deterministic
                     missing = list(set(variants[0]) - given_params)
                     if len(missing) > 1:
@@ -308,18 +296,15 @@ def wrapper(*args: object, **kwargs: object) -> object:
 
 
 @overload
-def strip_not_given(obj: None) -> None:
-    ...
+def strip_not_given(obj: None) -> None: ...
 
 
 @overload
-def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]:
-    ...
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
 
 
 @overload
-def strip_not_given(obj: object) -> object:
-    ...
+def strip_not_given(obj: object) -> object: ...
 
 
 def strip_not_given(obj: object | None) -> object:
@@ -391,13 +376,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        headers = cast(Headers, headers)
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
@@ -406,3 +391,48 @@ def get_required_header(headers: HeadersLike, header: str) -> str:
             return value
 
     raise ValueError(f"Could not find {header} header")
+
+
+def get_async_library() -> str:
+    try:
+        return sniffio.current_async_library()
+    except Exception:
+        return "false"
+
+
+def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
+    """A version of functools.lru_cache that retains the type signature
+    for the wrapped function arguments.
+    """
+    wrapper = functools.lru_cache(  # noqa: TID251
+        maxsize=maxsize,
+    )
+    return cast(Any, wrapper)  # type: ignore[no-any-return]
+
+
+def json_safe(data: object) -> object:
+    """Translates a mapping / sequence recursively in the same fashion
+    as `pydantic` v2's `model_dump(mode="json")`.
+    """
+    if is_mapping(data):
+        return {json_safe(key): json_safe(value) for key, value in data.items()}
+
+    if is_iterable(data) and not isinstance(data, (str, bytes, bytearray)):
+        return [json_safe(item) for item in data]
+
+    if isinstance(data, (datetime, date)):
+        return data.isoformat()
+
+    return data
+
+
+def is_azure_client(client: object) -> TypeGuard[AzureOpenAI]:
+    from ..lib.azure import AzureOpenAI
+
+    return isinstance(client, AzureOpenAI)
+
+
+def is_async_azure_client(client: object) -> TypeGuard[AsyncAzureOpenAI]:
+    from ..lib.azure import AsyncAzureOpenAI
+
+    return isinstance(client, AsyncAzureOpenAI)
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 1ef6479491..84c3a45a00 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.3.5"  # x-release-please-version
+__version__ = "1.93.0"  # x-release-please-version
diff --git a/src/openai/cli/_api/_main.py b/src/openai/cli/_api/_main.py
index fe5a5e6fc0..b04a3e52a4 100644
--- a/src/openai/cli/_api/_main.py
+++ b/src/openai/cli/_api/_main.py
@@ -2,7 +2,7 @@
 
 from argparse import ArgumentParser
 
-from . import chat, audio, files, image, models, completions
+from . import chat, audio, files, image, models, completions, fine_tuning
 
 
 def register_commands(parser: ArgumentParser) -> None:
@@ -14,3 +14,4 @@ def register_commands(parser: ArgumentParser) -> None:
     files.register(subparsers)
     models.register(subparsers)
     completions.register(subparsers)
+    fine_tuning.register(subparsers)
diff --git a/src/openai/cli/_api/audio.py b/src/openai/cli/_api/audio.py
index 90d21b9932..269c67df28 100644
--- a/src/openai/cli/_api/audio.py
+++ b/src/openai/cli/_api/audio.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import sys
 from typing import TYPE_CHECKING, Any, Optional, cast
 from argparse import ArgumentParser
 
@@ -7,6 +8,7 @@
 from ..._types import NOT_GIVEN
 from .._models import BaseModel
 from .._progress import BufferReader
+from ...types.audio import Transcription
 
 if TYPE_CHECKING:
     from argparse import _SubParsersAction
@@ -65,30 +67,42 @@ def transcribe(args: CLITranscribeArgs) -> None:
         with open(args.file, "rb") as file_reader:
             buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
 
-        model = get_client().audio.transcriptions.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            language=args.language or NOT_GIVEN,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
+        model = cast(
+            "Transcription | str",
+            get_client().audio.transcriptions.create(
+                file=(args.file, buffer_reader),
+                model=args.model,
+                language=args.language or NOT_GIVEN,
+                temperature=args.temperature or NOT_GIVEN,
+                prompt=args.prompt or NOT_GIVEN,
+                # casts required because the API is typed for enums
+                # but we don't want to validate that here for forwards-compat
+                response_format=cast(Any, args.response_format),
+            ),
         )
-        print_model(model)
+        if isinstance(model, str):
+            sys.stdout.write(model + "\n")
+        else:
+            print_model(model)
 
     @staticmethod
     def translate(args: CLITranslationArgs) -> None:
         with open(args.file, "rb") as file_reader:
             buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
 
-        model = get_client().audio.translations.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
+        model = cast(
+            "Transcription | str",
+            get_client().audio.translations.create(
+                file=(args.file, buffer_reader),
+                model=args.model,
+                temperature=args.temperature or NOT_GIVEN,
+                prompt=args.prompt or NOT_GIVEN,
+                # casts required because the API is typed for enums
+                # but we don't want to validate that here for forwards-compat
+                response_format=cast(Any, args.response_format),
+            ),
         )
-        print_model(model)
+        if isinstance(model, str):
+            sys.stdout.write(model + "\n")
+        else:
+            print_model(model)
diff --git a/src/openai/cli/_api/chat/completions.py b/src/openai/cli/_api/chat/completions.py
index c299741fe0..344eeff37c 100644
--- a/src/openai/cli/_api/chat/completions.py
+++ b/src/openai/cli/_api/chat/completions.py
@@ -100,13 +100,17 @@ def create(args: CLIChatCompletionCreateArgs) -> None:
             "messages": [
                 {"role": cast(Literal["user"], message.role), "content": message.content} for message in args.message
             ],
-            "n": args.n,
-            "temperature": args.temperature,
-            "top_p": args.top_p,
-            "stop": args.stop,
             # type checkers are not good at inferring union types so we have to set stream afterwards
             "stream": False,
         }
+        if args.temperature is not None:
+            params["temperature"] = args.temperature
+        if args.stop is not None:
+            params["stop"] = args.stop
+        if args.top_p is not None:
+            params["top_p"] = args.top_p
+        if args.n is not None:
+            params["n"] = args.n
         if args.stream:
             params["stream"] = args.stream  # type: ignore
         if args.max_tokens is not None:
diff --git a/src/openai/cli/_api/completions.py b/src/openai/cli/_api/completions.py
index ce1036b224..cbdb35bf3a 100644
--- a/src/openai/cli/_api/completions.py
+++ b/src/openai/cli/_api/completions.py
@@ -57,7 +57,7 @@ def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     )
     sub.add_argument(
         "--logprobs",
-        help="Include the log probabilites on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
+        help="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
         type=int,
     )
     sub.add_argument(
diff --git a/src/openai/cli/_api/fine_tuning/__init__.py b/src/openai/cli/_api/fine_tuning/__init__.py
new file mode 100644
index 0000000000..11a2dfccbd
--- /dev/null
+++ b/src/openai/cli/_api/fine_tuning/__init__.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from . import jobs
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    jobs.register(subparser)
diff --git a/src/openai/cli/_api/fine_tuning/jobs.py b/src/openai/cli/_api/fine_tuning/jobs.py
new file mode 100644
index 0000000000..806fa0f788
--- /dev/null
+++ b/src/openai/cli/_api/fine_tuning/jobs.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+from argparse import ArgumentParser
+
+from ..._utils import get_client, print_model
+from ...._types import NOT_GIVEN, NotGivenOr
+from ..._models import BaseModel
+from ....pagination import SyncCursorPage
+from ....types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+)
+
+if TYPE_CHECKING:
+    from argparse import _SubParsersAction
+
+
+def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
+    sub = subparser.add_parser("fine_tuning.jobs.create")
+    sub.add_argument(
+        "-m",
+        "--model",
+        help="The model to fine-tune.",
+        required=True,
+    )
+    sub.add_argument(
+        "-F",
+        "--training-file",
+        help="The training file to fine-tune the model on.",
+        required=True,
+    )
+    sub.add_argument(
+        "-H",
+        "--hyperparameters",
+        help="JSON string of hyperparameters to use for fine-tuning.",
+        type=str,
+    )
+    sub.add_argument(
+        "-s",
+        "--suffix",
+        help="A suffix to add to the fine-tuned model name.",
+    )
+    sub.add_argument(
+        "-V",
+        "--validation-file",
+        help="The validation file to use for fine-tuning.",
+    )
+    sub.set_defaults(func=CLIFineTuningJobs.create, args_model=CLIFineTuningJobsCreateArgs)
+
+    sub = subparser.add_parser("fine_tuning.jobs.retrieve")
+    sub.add_argument(
+        "-i",
+        "--id",
+        help="The ID of the fine-tuning job to retrieve.",
+        required=True,
+    )
+    sub.set_defaults(func=CLIFineTuningJobs.retrieve, args_model=CLIFineTuningJobsRetrieveArgs)
+
+    sub = subparser.add_parser("fine_tuning.jobs.list")
+    sub.add_argument(
+        "-a",
+        "--after",
+        help="Identifier for the last job from the previous pagination request. If provided, only jobs created after this job will be returned.",
+    )
+    sub.add_argument(
+        "-l",
+        "--limit",
+        help="Number of fine-tuning jobs to retrieve.",
+        type=int,
+    )
+    sub.set_defaults(func=CLIFineTuningJobs.list, args_model=CLIFineTuningJobsListArgs)
+
+    sub = subparser.add_parser("fine_tuning.jobs.cancel")
+    sub.add_argument(
+        "-i",
+        "--id",
+        help="The ID of the fine-tuning job to cancel.",
+        required=True,
+    )
+    sub.set_defaults(func=CLIFineTuningJobs.cancel, args_model=CLIFineTuningJobsCancelArgs)
+
+    sub = subparser.add_parser("fine_tuning.jobs.list_events")
+    sub.add_argument(
+        "-i",
+        "--id",
+        help="The ID of the fine-tuning job to list events for.",
+        required=True,
+    )
+    sub.add_argument(
+        "-a",
+        "--after",
+        help="Identifier for the last event from the previous pagination request. If provided, only events created after this event will be returned.",
+    )
+    sub.add_argument(
+        "-l",
+        "--limit",
+        help="Number of fine-tuning job events to retrieve.",
+        type=int,
+    )
+    sub.set_defaults(func=CLIFineTuningJobs.list_events, args_model=CLIFineTuningJobsListEventsArgs)
+
+
+class CLIFineTuningJobsCreateArgs(BaseModel):
+    model: str
+    training_file: str
+    hyperparameters: NotGivenOr[str] = NOT_GIVEN
+    suffix: NotGivenOr[str] = NOT_GIVEN
+    validation_file: NotGivenOr[str] = NOT_GIVEN
+
+
+class CLIFineTuningJobsRetrieveArgs(BaseModel):
+    id: str
+
+
+class CLIFineTuningJobsListArgs(BaseModel):
+    after: NotGivenOr[str] = NOT_GIVEN
+    limit: NotGivenOr[int] = NOT_GIVEN
+
+
+class CLIFineTuningJobsCancelArgs(BaseModel):
+    id: str
+
+
+class CLIFineTuningJobsListEventsArgs(BaseModel):
+    id: str
+    after: NotGivenOr[str] = NOT_GIVEN
+    limit: NotGivenOr[int] = NOT_GIVEN
+
+
+class CLIFineTuningJobs:
+    @staticmethod
+    def create(args: CLIFineTuningJobsCreateArgs) -> None:
+        hyperparameters = json.loads(str(args.hyperparameters)) if args.hyperparameters is not NOT_GIVEN else NOT_GIVEN
+        fine_tuning_job: FineTuningJob = get_client().fine_tuning.jobs.create(
+            model=args.model,
+            training_file=args.training_file,
+            hyperparameters=hyperparameters,
+            suffix=args.suffix,
+            validation_file=args.validation_file,
+        )
+        print_model(fine_tuning_job)
+
+    @staticmethod
+    def retrieve(args: CLIFineTuningJobsRetrieveArgs) -> None:
+        fine_tuning_job: FineTuningJob = get_client().fine_tuning.jobs.retrieve(fine_tuning_job_id=args.id)
+        print_model(fine_tuning_job)
+
+    @staticmethod
+    def list(args: CLIFineTuningJobsListArgs) -> None:
+        fine_tuning_jobs: SyncCursorPage[FineTuningJob] = get_client().fine_tuning.jobs.list(
+            after=args.after or NOT_GIVEN, limit=args.limit or NOT_GIVEN
+        )
+        print_model(fine_tuning_jobs)
+
+    @staticmethod
+    def cancel(args: CLIFineTuningJobsCancelArgs) -> None:
+        fine_tuning_job: FineTuningJob = get_client().fine_tuning.jobs.cancel(fine_tuning_job_id=args.id)
+        print_model(fine_tuning_job)
+
+    @staticmethod
+    def list_events(args: CLIFineTuningJobsListEventsArgs) -> None:
+        fine_tuning_job_events: SyncCursorPage[FineTuningJobEvent] = get_client().fine_tuning.jobs.list_events(
+            fine_tuning_job_id=args.id,
+            after=args.after or NOT_GIVEN,
+            limit=args.limit or NOT_GIVEN,
+        )
+        print_model(fine_tuning_job_events)
diff --git a/src/openai/cli/_api/image.py b/src/openai/cli/_api/image.py
index e6149eeac4..3e2a0a90f1 100644
--- a/src/openai/cli/_api/image.py
+++ b/src/openai/cli/_api/image.py
@@ -14,6 +14,7 @@
 
 def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub = subparser.add_parser("images.generate")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-p", "--prompt", type=str, required=True)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
@@ -21,6 +22,7 @@ def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
 
     sub = subparser.add_parser("images.edit")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-p", "--prompt", type=str, required=True)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument(
@@ -42,6 +44,7 @@ def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
     sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
 
     sub = subparser.add_parser("images.create_variation")
+    sub.add_argument("-m", "--model", type=str)
     sub.add_argument("-n", "--num-images", type=int, default=1)
     sub.add_argument(
         "-I",
@@ -60,6 +63,7 @@ class CLIImageCreateArgs(BaseModel):
     num_images: int
     size: str
     response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImageCreateVariationArgs(BaseModel):
@@ -67,6 +71,7 @@ class CLIImageCreateVariationArgs(BaseModel):
     num_images: int
     size: str
     response_format: str
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImageEditArgs(BaseModel):
@@ -76,12 +81,14 @@ class CLIImageEditArgs(BaseModel):
     response_format: str
     prompt: str
     mask: NotGivenOr[str] = NOT_GIVEN
+    model: NotGivenOr[str] = NOT_GIVEN
 
 
 class CLIImage:
     @staticmethod
     def create(args: CLIImageCreateArgs) -> None:
         image = get_client().images.generate(
+            model=args.model,
             prompt=args.prompt,
             n=args.num_images,
             # casts required because the API is typed for enums
@@ -97,6 +104,7 @@ def create_variation(args: CLIImageCreateVariationArgs) -> None:
             buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
 
         image = get_client().images.create_variation(
+            model=args.model,
             image=("image", buffer_reader),
             n=args.num_images,
             # casts required because the API is typed for enums
@@ -118,6 +126,7 @@ def edit(args: CLIImageEditArgs) -> None:
                 mask = BufferReader(file_reader.read(), desc="Mask progress")
 
         image = get_client().images.edit(
+            model=args.model,
             prompt=args.prompt,
             image=("image", buffer_reader),
             n=args.num_images,
diff --git a/src/openai/cli/_cli.py b/src/openai/cli/_cli.py
index 72e5c923bd..fd165f48ab 100644
--- a/src/openai/cli/_cli.py
+++ b/src/openai/cli/_cli.py
@@ -15,7 +15,6 @@
 from .. import _ApiType, __version__
 from ._api import register_commands
 from ._utils import can_use_http2
-from .._types import ProxiesDict
 from ._errors import CLIError, display_error
 from .._compat import PYDANTIC_V2, ConfigDict, model_parse
 from .._models import BaseModel
@@ -167,17 +166,17 @@ def _main() -> None:
     if args.verbosity != 0:
         sys.stderr.write("Warning: --verbosity isn't supported yet\n")
 
-    proxies: ProxiesDict = {}
+    proxies: dict[str, httpx.BaseTransport] = {}
     if args.proxy is not None:
         for proxy in args.proxy:
             key = "https://" if proxy.startswith("https") else "http://"
             if key in proxies:
                 raise CLIError(f"Multiple {key} proxies given - only the last one would be used")
 
-            proxies[key] = proxy
+            proxies[key] = httpx.HTTPTransport(proxy=httpx.Proxy(httpx.URL(proxy)))
 
     http_client = httpx.Client(
-        proxies=proxies or None,
+        mounts=proxies or None,
         http2=can_use_http2(),
     )
     openai.http_client = http_client
diff --git a/src/openai/cli/_errors.py b/src/openai/cli/_errors.py
index 2bf06070d6..7d0292dab2 100644
--- a/src/openai/cli/_errors.py
+++ b/src/openai/cli/_errors.py
@@ -8,12 +8,10 @@
 from .._exceptions import APIError, OpenAIError
 
 
-class CLIError(OpenAIError):
-    ...
+class CLIError(OpenAIError): ...
 
 
-class SilentCLIError(CLIError):
-    ...
+class SilentCLIError(CLIError): ...
 
 
 def display_error(err: CLIError | APIError | pydantic.ValidationError) -> None:
diff --git a/src/openai/cli/_progress.py b/src/openai/cli/_progress.py
index 390aaa9dfe..8a7f2525de 100644
--- a/src/openai/cli/_progress.py
+++ b/src/openai/cli/_progress.py
@@ -35,7 +35,7 @@ def read(self, n: int | None = -1) -> bytes:
         try:
             self._callback(self._progress)
         except Exception as e:  # catches exception from the callback
-            raise CancelledError("The upload was cancelled: {}".format(e))
+            raise CancelledError("The upload was cancelled: {}".format(e)) from e
 
         return chunk
 
diff --git a/src/openai/cli/_tools/migrate.py b/src/openai/cli/_tools/migrate.py
index 714bead8e3..841b777528 100644
--- a/src/openai/cli/_tools/migrate.py
+++ b/src/openai/cli/_tools/migrate.py
@@ -2,7 +2,6 @@
 
 import os
 import sys
-import json
 import shutil
 import tarfile
 import platform
@@ -41,7 +40,7 @@ def grit(args: GritArgs) -> None:
     except subprocess.CalledProcessError:
         # stdout and stderr are forwarded by subprocess so an error will already
         # have been displayed
-        raise SilentCLIError()
+        raise SilentCLIError() from None
 
 
 class MigrateArgs(BaseModel):
@@ -57,7 +56,7 @@ def migrate(args: MigrateArgs) -> None:
     except subprocess.CalledProcessError:
         # stdout and stderr are forwarded by subprocess so an error will already
         # have been displayed
-        raise SilentCLIError()
+        raise SilentCLIError() from None
 
 
 # handles downloading the Grit CLI until they provide their own PyPi package
@@ -85,14 +84,16 @@ def install() -> Path:
     if sys.platform == "win32":
         raise CLIError("Windows is not supported yet in the migration CLI")
 
-    platform = "macos" if sys.platform == "darwin" else "linux"
+    _debug("Using Grit installer from GitHub")
+
+    platform = "apple-darwin" if sys.platform == "darwin" else "unknown-linux-gnu"
 
     dir_name = _cache_dir() / "openai-python"
     install_dir = dir_name / ".install"
     target_dir = install_dir / "bin"
 
-    target_path = target_dir / "marzano"
-    temp_file = target_dir / "marzano.tmp"
+    target_path = target_dir / "grit"
+    temp_file = target_dir / "grit.tmp"
 
     if target_path.exists():
         _debug(f"{target_path} already exists")
@@ -109,27 +110,14 @@ def install() -> Path:
     arch = _get_arch()
     _debug(f"Using architecture {arch}")
 
-    file_name = f"marzano-{platform}-{arch}"
-    meta_url = f"https://api.keygen.sh/v1/accounts/%7BKEYGEN_ACCOUNT%7D/artifacts/%7Bfile_name%7D"
+    file_name = f"grit-{arch}-{platform}"
+    download_url = f"https://github.com/getgrit/gritql/releases/latest/download/%7Bfile_name%7D.tar.gz"
 
-    sys.stdout.write(f"Retrieving Grit CLI metadata from {meta_url}\n")
+    sys.stdout.write(f"Downloading Grit CLI from {download_url}\n")
     with httpx.Client() as client:
-        response = client.get(meta_url)  # pyright: ignore[reportUnknownMemberType]
-
-        data = response.json()
-        errors = data.get("errors")
-        if errors:
-            for error in errors:
-                sys.stdout.write(f"{error}\n")
-
-            raise CLIError("Could not locate Grit CLI binary - see above errors")
-
-        write_manifest(install_dir, data["data"]["relationships"]["release"]["data"]["id"])
-
-        link = data["data"]["links"]["redirect"]
-        _debug(f"Redirect URL {link}")
-
-        download_response = client.get(link)  # pyright: ignore[reportUnknownMemberType]
+        download_response = client.get(download_url, follow_redirects=True)
+        if download_response.status_code != 200:
+            raise CLIError(f"Failed to download Grit CLI from {download_url}")
         with open(temp_file, "wb") as file:
             for chunk in download_response.iter_bytes():
                 file.write(chunk)
@@ -138,10 +126,12 @@ def install() -> Path:
     unpacked_dir.mkdir(parents=True, exist_ok=True)
 
     with tarfile.open(temp_file, "r:gz") as archive:
-        archive.extractall(unpacked_dir)
+        if sys.version_info >= (3, 12):
+            archive.extractall(unpacked_dir, filter="data")
+        else:
+            archive.extractall(unpacked_dir)
 
-    for item in unpacked_dir.iterdir():
-        item.rename(target_dir / item.name)
+    _move_files_recursively(unpacked_dir, target_dir)
 
     shutil.rmtree(unpacked_dir)
     os.remove(temp_file)
@@ -152,30 +142,23 @@ def install() -> Path:
     return target_path
 
 
+def _move_files_recursively(source_dir: Path, target_dir: Path) -> None:
+    for item in source_dir.iterdir():
+        if item.is_file():
+            item.rename(target_dir / item.name)
+        elif item.is_dir():
+            _move_files_recursively(item, target_dir)
+
+
 def _get_arch() -> str:
     architecture = platform.machine().lower()
 
-    # Map the architecture names to Node.js equivalents
+    # Map the architecture names to Grit equivalents
     arch_map = {
-        "x86_64": "x64",
-        "amd64": "x64",
-        "armv7l": "arm",
-        "aarch64": "arm64",
+        "x86_64": "x86_64",
+        "amd64": "x86_64",
+        "armv7l": "aarch64",
+        "arm64": "aarch64",
     }
 
     return arch_map.get(architecture, architecture)
-
-
-def write_manifest(install_path: Path, release: str) -> None:
-    manifest = {
-        "installPath": str(install_path),
-        "binaries": {
-            "marzano": {
-                "name": "marzano",
-                "release": release,
-            },
-        },
-    }
-    manifest_path = Path(install_path) / "manifests.json"
-    with open(manifest_path, "w") as f:
-        json.dump(manifest, f, indent=2)
diff --git a/src/openai/helpers/__init__.py b/src/openai/helpers/__init__.py
new file mode 100644
index 0000000000..ab3044da59
--- /dev/null
+++ b/src/openai/helpers/__init__.py
@@ -0,0 +1,4 @@
+from .microphone import Microphone
+from .local_audio_player import LocalAudioPlayer
+
+__all__ = ["Microphone", "LocalAudioPlayer"]
diff --git a/src/openai/helpers/local_audio_player.py b/src/openai/helpers/local_audio_player.py
new file mode 100644
index 0000000000..8f12c27a56
--- /dev/null
+++ b/src/openai/helpers/local_audio_player.py
@@ -0,0 +1,165 @@
+# mypy: ignore-errors
+from __future__ import annotations
+
+import queue
+import asyncio
+from typing import Any, Union, Callable, AsyncGenerator, cast
+from typing_extensions import TYPE_CHECKING
+
+from .. import _legacy_response
+from .._extras import numpy as np, sounddevice as sd
+from .._response import StreamedBinaryAPIResponse, AsyncStreamedBinaryAPIResponse
+
+if TYPE_CHECKING:
+    import numpy.typing as npt
+
+SAMPLE_RATE = 24000
+
+
+class LocalAudioPlayer:
+    def __init__(
+        self,
+        should_stop: Union[Callable[[], bool], None] = None,
+    ):
+        self.channels = 1
+        self.dtype = np.float32
+        self.should_stop = should_stop
+
+    async def _tts_response_to_buffer(
+        self,
+        response: Union[
+            _legacy_response.HttpxBinaryResponseContent,
+            AsyncStreamedBinaryAPIResponse,
+            StreamedBinaryAPIResponse,
+        ],
+    ) -> npt.NDArray[np.float32]:
+        chunks: list[bytes] = []
+        if isinstance(response, _legacy_response.HttpxBinaryResponseContent) or isinstance(
+            response, StreamedBinaryAPIResponse
+        ):
+            for chunk in response.iter_bytes(chunk_size=1024):
+                if chunk:
+                    chunks.append(chunk)
+        else:
+            async for chunk in response.iter_bytes(chunk_size=1024):
+                if chunk:
+                    chunks.append(chunk)
+
+        audio_bytes = b"".join(chunks)
+        audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0
+        audio_np = audio_np.reshape(-1, 1)
+        return audio_np
+
+    async def play(
+        self,
+        input: Union[
+            npt.NDArray[np.int16],
+            npt.NDArray[np.float32],
+            _legacy_response.HttpxBinaryResponseContent,
+            AsyncStreamedBinaryAPIResponse,
+            StreamedBinaryAPIResponse,
+        ],
+    ) -> None:
+        audio_content: npt.NDArray[np.float32]
+        if isinstance(input, np.ndarray):
+            if input.dtype == np.int16 and self.dtype == np.float32:
+                audio_content = (input.astype(np.float32) / 32767.0).reshape(-1, self.channels)
+            elif input.dtype == np.float32:
+                audio_content = cast("npt.NDArray[np.float32]", input)
+            else:
+                raise ValueError(f"Unsupported dtype: {input.dtype}")
+        else:
+            audio_content = await self._tts_response_to_buffer(input)
+
+        loop = asyncio.get_event_loop()
+        event = asyncio.Event()
+        idx = 0
+
+        def callback(
+            outdata: npt.NDArray[np.float32],
+            frame_count: int,
+            _time_info: Any,
+            _status: Any,
+        ):
+            nonlocal idx
+
+            remainder = len(audio_content) - idx
+            if remainder == 0 or (callable(self.should_stop) and self.should_stop()):
+                loop.call_soon_threadsafe(event.set)
+                raise sd.CallbackStop
+            valid_frames = frame_count if remainder >= frame_count else remainder
+            outdata[:valid_frames] = audio_content[idx : idx + valid_frames]
+            outdata[valid_frames:] = 0
+            idx += valid_frames
+
+        stream = sd.OutputStream(
+            samplerate=SAMPLE_RATE,
+            callback=callback,
+            dtype=audio_content.dtype,
+            channels=audio_content.shape[1],
+        )
+        with stream:
+            await event.wait()
+
+    async def play_stream(
+        self,
+        buffer_stream: AsyncGenerator[Union[npt.NDArray[np.float32], npt.NDArray[np.int16], None], None],
+    ) -> None:
+        loop = asyncio.get_event_loop()
+        event = asyncio.Event()
+        buffer_queue: queue.Queue[Union[npt.NDArray[np.float32], npt.NDArray[np.int16], None]] = queue.Queue(maxsize=50)
+
+        async def buffer_producer():
+            async for buffer in buffer_stream:
+                if buffer is None:
+                    break
+                await loop.run_in_executor(None, buffer_queue.put, buffer)
+            await loop.run_in_executor(None, buffer_queue.put, None)  # Signal completion
+
+        def callback(
+            outdata: npt.NDArray[np.float32],
+            frame_count: int,
+            _time_info: Any,
+            _status: Any,
+        ):
+            nonlocal current_buffer, buffer_pos
+
+            frames_written = 0
+            while frames_written < frame_count:
+                if current_buffer is None or buffer_pos >= len(current_buffer):
+                    try:
+                        current_buffer = buffer_queue.get(timeout=0.1)
+                        if current_buffer is None:
+                            loop.call_soon_threadsafe(event.set)
+                            raise sd.CallbackStop
+                        buffer_pos = 0
+
+                        if current_buffer.dtype == np.int16 and self.dtype == np.float32:
+                            current_buffer = (current_buffer.astype(np.float32) / 32767.0).reshape(-1, self.channels)
+
+                    except queue.Empty:
+                        outdata[frames_written:] = 0
+                        return
+
+                remaining_frames = len(current_buffer) - buffer_pos
+                frames_to_write = min(frame_count - frames_written, remaining_frames)
+                outdata[frames_written : frames_written + frames_to_write] = current_buffer[
+                    buffer_pos : buffer_pos + frames_to_write
+                ]
+                buffer_pos += frames_to_write
+                frames_written += frames_to_write
+
+        current_buffer = None
+        buffer_pos = 0
+
+        producer_task = asyncio.create_task(buffer_producer())
+
+        with sd.OutputStream(
+            samplerate=SAMPLE_RATE,
+            channels=self.channels,
+            dtype=self.dtype,
+            callback=callback,
+        ):
+            await event.wait()
+
+        await producer_task
diff --git a/src/openai/helpers/microphone.py b/src/openai/helpers/microphone.py
new file mode 100644
index 0000000000..62a6d8d8a9
--- /dev/null
+++ b/src/openai/helpers/microphone.py
@@ -0,0 +1,100 @@
+# mypy: ignore-errors
+from __future__ import annotations
+
+import io
+import time
+import wave
+import asyncio
+from typing import Any, Type, Union, Generic, TypeVar, Callable, overload
+from typing_extensions import TYPE_CHECKING, Literal
+
+from .._types import FileTypes, FileContent
+from .._extras import numpy as np, sounddevice as sd
+
+if TYPE_CHECKING:
+    import numpy.typing as npt
+
+SAMPLE_RATE = 24000
+
+DType = TypeVar("DType", bound=np.generic)
+
+
+class Microphone(Generic[DType]):
+    def __init__(
+        self,
+        channels: int = 1,
+        dtype: Type[DType] = np.int16,
+        should_record: Union[Callable[[], bool], None] = None,
+        timeout: Union[float, None] = None,
+    ):
+        self.channels = channels
+        self.dtype = dtype
+        self.should_record = should_record
+        self.buffer_chunks = []
+        self.timeout = timeout
+        self.has_record_function = callable(should_record)
+
+    def _ndarray_to_wav(self, audio_data: npt.NDArray[DType]) -> FileTypes:
+        buffer: FileContent = io.BytesIO()
+        with wave.open(buffer, "w") as wav_file:
+            wav_file.setnchannels(self.channels)
+            wav_file.setsampwidth(np.dtype(self.dtype).itemsize)
+            wav_file.setframerate(SAMPLE_RATE)
+            wav_file.writeframes(audio_data.tobytes())
+        buffer.seek(0)
+        return ("audio.wav", buffer, "audio/wav")
+
+    @overload
+    async def record(self, return_ndarray: Literal[True]) -> npt.NDArray[DType]: ...
+
+    @overload
+    async def record(self, return_ndarray: Literal[False]) -> FileTypes: ...
+
+    @overload
+    async def record(self, return_ndarray: None = ...) -> FileTypes: ...
+
+    async def record(self, return_ndarray: Union[bool, None] = False) -> Union[npt.NDArray[DType], FileTypes]:
+        loop = asyncio.get_event_loop()
+        event = asyncio.Event()
+        self.buffer_chunks: list[npt.NDArray[DType]] = []
+        start_time = time.perf_counter()
+
+        def callback(
+            indata: npt.NDArray[DType],
+            _frame_count: int,
+            _time_info: Any,
+            _status: Any,
+        ):
+            execution_time = time.perf_counter() - start_time
+            reached_recording_timeout = execution_time > self.timeout if self.timeout is not None else False
+            if reached_recording_timeout:
+                loop.call_soon_threadsafe(event.set)
+                raise sd.CallbackStop
+
+            should_be_recording = self.should_record() if callable(self.should_record) else True
+            if not should_be_recording:
+                loop.call_soon_threadsafe(event.set)
+                raise sd.CallbackStop
+
+            self.buffer_chunks.append(indata.copy())
+
+        stream = sd.InputStream(
+            callback=callback,
+            dtype=self.dtype,
+            samplerate=SAMPLE_RATE,
+            channels=self.channels,
+        )
+        with stream:
+            await event.wait()
+
+        # Concatenate all chunks into a single buffer, handle empty case
+        concatenated_chunks: npt.NDArray[DType] = (
+            np.concatenate(self.buffer_chunks, axis=0)
+            if len(self.buffer_chunks) > 0
+            else np.array([], dtype=self.dtype)
+        )
+
+        if return_ndarray:
+            return concatenated_chunks
+        else:
+            return self._ndarray_to_wav(concatenated_chunks)
diff --git a/src/openai/lib/.keep b/src/openai/lib/.keep
new file mode 100644
index 0000000000..5e2c99fdbe
--- /dev/null
+++ b/src/openai/lib/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store custom files to expand the SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/src/openai/lib/__init__.py b/src/openai/lib/__init__.py
new file mode 100644
index 0000000000..5c6cb782c0
--- /dev/null
+++ b/src/openai/lib/__init__.py
@@ -0,0 +1,2 @@
+from ._tools import pydantic_function_tool as pydantic_function_tool
+from ._parsing import ResponseFormatT as ResponseFormatT
diff --git a/src/openai/lib/_old_api.py b/src/openai/lib/_old_api.py
index c4038fcfaf..929c87e80b 100644
--- a/src/openai/lib/_old_api.py
+++ b/src/openai/lib/_old_api.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from typing_extensions import override
 
 from .._utils import LazyProxy
@@ -23,13 +23,19 @@ def __init__(self, *, symbol: str) -> None:
         super().__init__(INSTRUCTIONS.format(symbol=symbol))
 
 
-class APIRemovedInV1Proxy(LazyProxy[None]):
+class APIRemovedInV1Proxy(LazyProxy[Any]):
     def __init__(self, *, symbol: str) -> None:
         super().__init__()
         self._symbol = symbol
 
     @override
-    def __load__(self) -> None:
+    def __load__(self) -> Any:
+        # return the proxy until it is eventually called so that
+        # we don't break people that are just checking the attributes
+        # of a module
+        return self
+
+    def __call__(self, *_args: Any, **_kwargs: Any) -> Any:
         raise APIRemovedInV1(symbol=self._symbol)
 
 
diff --git a/src/openai/lib/_parsing/__init__.py b/src/openai/lib/_parsing/__init__.py
new file mode 100644
index 0000000000..4d454c3a20
--- /dev/null
+++ b/src/openai/lib/_parsing/__init__.py
@@ -0,0 +1,12 @@
+from ._completions import (
+    ResponseFormatT as ResponseFormatT,
+    has_parseable_input,
+    has_parseable_input as has_parseable_input,
+    maybe_parse_content as maybe_parse_content,
+    validate_input_tools as validate_input_tools,
+    parse_chat_completion as parse_chat_completion,
+    get_input_tool_by_name as get_input_tool_by_name,
+    solve_response_format_t as solve_response_format_t,
+    parse_function_tool_arguments as parse_function_tool_arguments,
+    type_to_response_format_param as type_to_response_format_param,
+)
diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
new file mode 100644
index 0000000000..c160070b66
--- /dev/null
+++ b/src/openai/lib/_parsing/_completions.py
@@ -0,0 +1,264 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any, Iterable, cast
+from typing_extensions import TypeVar, TypeGuard, assert_never
+
+import pydantic
+
+from .._tools import PydanticFunctionTool
+from ..._types import NOT_GIVEN, NotGiven
+from ..._utils import is_dict, is_given
+from ..._compat import PYDANTIC_V2, model_parse_json
+from ..._models import construct_type_unchecked
+from .._pydantic import is_basemodel_type, to_strict_json_schema, is_dataclass_like_type
+from ...types.chat import (
+    ParsedChoice,
+    ChatCompletion,
+    ParsedFunction,
+    ParsedChatCompletion,
+    ChatCompletionMessage,
+    ParsedFunctionToolCall,
+    ChatCompletionToolParam,
+    ParsedChatCompletionMessage,
+    completion_create_params,
+)
+from ..._exceptions import LengthFinishReasonError, ContentFilterFinishReasonError
+from ...types.shared_params import FunctionDefinition
+from ...types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
+from ...types.chat.chat_completion_message_tool_call import Function
+
+ResponseFormatT = TypeVar(
+    "ResponseFormatT",
+    # if it isn't given then we don't do any parsing
+    default=None,
+)
+_default_response_format: None = None
+
+
+def validate_input_tools(
+    tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+) -> None:
+    if not is_given(tools):
+        return
+
+    for tool in tools:
+        if tool["type"] != "function":
+            raise ValueError(
+                f"Currently only `function` tool types support auto-parsing; Received `{tool['type']}`",
+            )
+
+        strict = tool["function"].get("strict")
+        if strict is not True:
+            raise ValueError(
+                f"`{tool['function']['name']}` is not strict. Only `strict` function tools can be auto-parsed"
+            )
+
+
+def parse_chat_completion(
+    *,
+    response_format: type[ResponseFormatT] | completion_create_params.ResponseFormat | NotGiven,
+    input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    chat_completion: ChatCompletion | ParsedChatCompletion[object],
+) -> ParsedChatCompletion[ResponseFormatT]:
+    if is_given(input_tools):
+        input_tools = [t for t in input_tools]
+    else:
+        input_tools = []
+
+    choices: list[ParsedChoice[ResponseFormatT]] = []
+    for choice in chat_completion.choices:
+        if choice.finish_reason == "length":
+            raise LengthFinishReasonError(completion=chat_completion)
+
+        if choice.finish_reason == "content_filter":
+            raise ContentFilterFinishReasonError()
+
+        message = choice.message
+
+        tool_calls: list[ParsedFunctionToolCall] = []
+        if message.tool_calls:
+            for tool_call in message.tool_calls:
+                if tool_call.type == "function":
+                    tool_call_dict = tool_call.to_dict()
+                    tool_calls.append(
+                        construct_type_unchecked(
+                            value={
+                                **tool_call_dict,
+                                "function": {
+                                    **cast(Any, tool_call_dict["function"]),
+                                    "parsed_arguments": parse_function_tool_arguments(
+                                        input_tools=input_tools, function=tool_call.function
+                                    ),
+                                },
+                            },
+                            type_=ParsedFunctionToolCall,
+                        )
+                    )
+                elif TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(tool_call)
+                else:
+                    tool_calls.append(tool_call)
+
+        choices.append(
+            construct_type_unchecked(
+                type_=cast(Any, ParsedChoice)[solve_response_format_t(response_format)],
+                value={
+                    **choice.to_dict(),
+                    "message": {
+                        **message.to_dict(),
+                        "parsed": maybe_parse_content(
+                            response_format=response_format,
+                            message=message,
+                        ),
+                        "tool_calls": tool_calls if tool_calls else None,
+                    },
+                },
+            )
+        )
+
+    return cast(
+        ParsedChatCompletion[ResponseFormatT],
+        construct_type_unchecked(
+            type_=cast(Any, ParsedChatCompletion)[solve_response_format_t(response_format)],
+            value={
+                **chat_completion.to_dict(),
+                "choices": choices,
+            },
+        ),
+    )
+
+
+def get_input_tool_by_name(*, input_tools: list[ChatCompletionToolParam], name: str) -> ChatCompletionToolParam | None:
+    return next((t for t in input_tools if t.get("function", {}).get("name") == name), None)
+
+
+def parse_function_tool_arguments(
+    *, input_tools: list[ChatCompletionToolParam], function: Function | ParsedFunction
+) -> object:
+    input_tool = get_input_tool_by_name(input_tools=input_tools, name=function.name)
+    if not input_tool:
+        return None
+
+    input_fn = cast(object, input_tool.get("function"))
+    if isinstance(input_fn, PydanticFunctionTool):
+        return model_parse_json(input_fn.model, function.arguments)
+
+    input_fn = cast(FunctionDefinition, input_fn)
+
+    if not input_fn.get("strict"):
+        return None
+
+    return json.loads(function.arguments)
+
+
+def maybe_parse_content(
+    *,
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    message: ChatCompletionMessage | ParsedChatCompletionMessage[object],
+) -> ResponseFormatT | None:
+    if has_rich_response_format(response_format) and message.content and not message.refusal:
+        return _parse_content(response_format, message.content)
+
+    return None
+
+
+def solve_response_format_t(
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+) -> type[ResponseFormatT]:
+    """Return the runtime type for the given response format.
+
+    If no response format is given, or if we won't auto-parse the response format
+    then we default to `None`.
+    """
+    if has_rich_response_format(response_format):
+        return response_format
+
+    return cast("type[ResponseFormatT]", _default_response_format)
+
+
+def has_parseable_input(
+    *,
+    response_format: type | ResponseFormatParam | NotGiven,
+    input_tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+) -> bool:
+    if has_rich_response_format(response_format):
+        return True
+
+    for input_tool in input_tools or []:
+        if is_parseable_tool(input_tool):
+            return True
+
+    return False
+
+
+def has_rich_response_format(
+    response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+) -> TypeGuard[type[ResponseFormatT]]:
+    if not is_given(response_format):
+        return False
+
+    if is_response_format_param(response_format):
+        return False
+
+    return True
+
+
+def is_response_format_param(response_format: object) -> TypeGuard[ResponseFormatParam]:
+    return is_dict(response_format)
+
+
+def is_parseable_tool(input_tool: ChatCompletionToolParam) -> bool:
+    input_fn = cast(object, input_tool.get("function"))
+    if isinstance(input_fn, PydanticFunctionTool):
+        return True
+
+    return cast(FunctionDefinition, input_fn).get("strict") or False
+
+
+def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT:
+    if is_basemodel_type(response_format):
+        return cast(ResponseFormatT, model_parse_json(response_format, content))
+
+    if is_dataclass_like_type(response_format):
+        if not PYDANTIC_V2:
+            raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {response_format}")
+
+        return pydantic.TypeAdapter(response_format).validate_json(content)
+
+    raise TypeError(f"Unable to automatically parse response format type {response_format}")
+
+
+def type_to_response_format_param(
+    response_format: type | completion_create_params.ResponseFormat | NotGiven,
+) -> ResponseFormatParam | NotGiven:
+    if not is_given(response_format):
+        return NOT_GIVEN
+
+    if is_response_format_param(response_format):
+        return response_format
+
+    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
+    # a safe default behaviour but we know that at this point the `response_format`
+    # can only be a `type`
+    response_format = cast(type, response_format)
+
+    json_schema_type: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any] | None = None
+
+    if is_basemodel_type(response_format):
+        name = response_format.__name__
+        json_schema_type = response_format
+    elif is_dataclass_like_type(response_format):
+        name = response_format.__name__
+        json_schema_type = pydantic.TypeAdapter(response_format)
+    else:
+        raise TypeError(f"Unsupported response_format type - {response_format}")
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "schema": to_strict_json_schema(json_schema_type),
+            "name": name,
+            "strict": True,
+        },
+    }
diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py
new file mode 100644
index 0000000000..41be1d37b0
--- /dev/null
+++ b/src/openai/lib/_parsing/_responses.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any, List, Iterable, cast
+from typing_extensions import TypeVar, assert_never
+
+import pydantic
+
+from .._tools import ResponsesPydanticFunctionTool
+from ..._types import NotGiven
+from ..._utils import is_given
+from ..._compat import PYDANTIC_V2, model_parse_json
+from ..._models import construct_type_unchecked
+from .._pydantic import is_basemodel_type, is_dataclass_like_type
+from ._completions import solve_response_format_t, type_to_response_format_param
+from ...types.responses import (
+    Response,
+    ToolParam,
+    ParsedContent,
+    ParsedResponse,
+    FunctionToolParam,
+    ParsedResponseOutputItem,
+    ParsedResponseOutputText,
+    ResponseFunctionToolCall,
+    ParsedResponseOutputMessage,
+    ResponseFormatTextConfigParam,
+    ParsedResponseFunctionToolCall,
+)
+from ...types.chat.completion_create_params import ResponseFormat
+
+TextFormatT = TypeVar(
+    "TextFormatT",
+    # if it isn't given then we don't do any parsing
+    default=None,
+)
+
+
+def type_to_text_format_param(type_: type) -> ResponseFormatTextConfigParam:
+    response_format_dict = type_to_response_format_param(type_)
+    assert is_given(response_format_dict)
+    response_format_dict = cast(ResponseFormat, response_format_dict)  # pyright: ignore[reportUnnecessaryCast]
+    assert response_format_dict["type"] == "json_schema"
+    assert "schema" in response_format_dict["json_schema"]
+
+    return {
+        "type": "json_schema",
+        "strict": True,
+        "name": response_format_dict["json_schema"]["name"],
+        "schema": response_format_dict["json_schema"]["schema"],
+    }
+
+
+def parse_response(
+    *,
+    text_format: type[TextFormatT] | NotGiven,
+    input_tools: Iterable[ToolParam] | NotGiven | None,
+    response: Response | ParsedResponse[object],
+) -> ParsedResponse[TextFormatT]:
+    solved_t = solve_response_format_t(text_format)
+    output_list: List[ParsedResponseOutputItem[TextFormatT]] = []
+
+    for output in response.output:
+        if output.type == "message":
+            content_list: List[ParsedContent[TextFormatT]] = []
+            for item in output.content:
+                if item.type != "output_text":
+                    content_list.append(item)
+                    continue
+
+                content_list.append(
+                    construct_type_unchecked(
+                        type_=cast(Any, ParsedResponseOutputText)[solved_t],
+                        value={
+                            **item.to_dict(),
+                            "parsed": parse_text(item.text, text_format=text_format),
+                        },
+                    )
+                )
+
+            output_list.append(
+                construct_type_unchecked(
+                    type_=cast(Any, ParsedResponseOutputMessage)[solved_t],
+                    value={
+                        **output.to_dict(),
+                        "content": content_list,
+                    },
+                )
+            )
+        elif output.type == "function_call":
+            output_list.append(
+                construct_type_unchecked(
+                    type_=ParsedResponseFunctionToolCall,
+                    value={
+                        **output.to_dict(),
+                        "parsed_arguments": parse_function_tool_arguments(
+                            input_tools=input_tools, function_call=output
+                        ),
+                    },
+                )
+            )
+        elif (
+            output.type == "computer_call"
+            or output.type == "file_search_call"
+            or output.type == "web_search_call"
+            or output.type == "reasoning"
+            or output.type == "mcp_call"
+            or output.type == "mcp_approval_request"
+            or output.type == "image_generation_call"
+            or output.type == "code_interpreter_call"
+            or output.type == "local_shell_call"
+            or output.type == "mcp_list_tools"
+            or output.type == "exec"
+        ):
+            output_list.append(output)
+        elif TYPE_CHECKING:  # type: ignore
+            assert_never(output)
+        else:
+            output_list.append(output)
+
+    return cast(
+        ParsedResponse[TextFormatT],
+        construct_type_unchecked(
+            type_=cast(Any, ParsedResponse)[solved_t],
+            value={
+                **response.to_dict(),
+                "output": output_list,
+            },
+        ),
+    )
+
+
+def parse_text(text: str, text_format: type[TextFormatT] | NotGiven) -> TextFormatT | None:
+    if not is_given(text_format):
+        return None
+
+    if is_basemodel_type(text_format):
+        return cast(TextFormatT, model_parse_json(text_format, text))
+
+    if is_dataclass_like_type(text_format):
+        if not PYDANTIC_V2:
+            raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {text_format}")
+
+        return pydantic.TypeAdapter(text_format).validate_json(text)
+
+    raise TypeError(f"Unable to automatically parse response format type {text_format}")
+
+
+def get_input_tool_by_name(*, input_tools: Iterable[ToolParam], name: str) -> FunctionToolParam | None:
+    for tool in input_tools:
+        if tool["type"] == "function" and tool.get("name") == name:
+            return tool
+
+    return None
+
+
+def parse_function_tool_arguments(
+    *,
+    input_tools: Iterable[ToolParam] | NotGiven | None,
+    function_call: ParsedResponseFunctionToolCall | ResponseFunctionToolCall,
+) -> object:
+    if input_tools is None or not is_given(input_tools):
+        return None
+
+    input_tool = get_input_tool_by_name(input_tools=input_tools, name=function_call.name)
+    if not input_tool:
+        return None
+
+    tool = cast(object, input_tool)
+    if isinstance(tool, ResponsesPydanticFunctionTool):
+        return model_parse_json(tool.model, function_call.arguments)
+
+    if not input_tool.get("strict"):
+        return None
+
+    return json.loads(function_call.arguments)
diff --git a/src/openai/lib/_pydantic.py b/src/openai/lib/_pydantic.py
new file mode 100644
index 0000000000..c2d73e5fc6
--- /dev/null
+++ b/src/openai/lib/_pydantic.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, TypeVar
+from typing_extensions import TypeGuard
+
+import pydantic
+
+from .._types import NOT_GIVEN
+from .._utils import is_dict as _is_dict, is_list
+from .._compat import PYDANTIC_V2, model_json_schema
+
+_T = TypeVar("_T")
+
+
+def to_strict_json_schema(model: type[pydantic.BaseModel] | pydantic.TypeAdapter[Any]) -> dict[str, Any]:
+    if inspect.isclass(model) and is_basemodel_type(model):
+        schema = model_json_schema(model)
+    elif PYDANTIC_V2 and isinstance(model, pydantic.TypeAdapter):
+        schema = model.json_schema()
+    else:
+        raise TypeError(f"Non BaseModel types are only supported with Pydantic v2 - {model}")
+
+    return _ensure_strict_json_schema(schema, path=(), root=schema)
+
+
+def _ensure_strict_json_schema(
+    json_schema: object,
+    *,
+    path: tuple[str, ...],
+    root: dict[str, object],
+) -> dict[str, Any]:
+    """Mutates the given JSON schema to ensure it conforms to the `strict` standard
+    that the API expects.
+    """
+    if not is_dict(json_schema):
+        raise TypeError(f"Expected {json_schema} to be a dictionary; path={path}")
+
+    defs = json_schema.get("$defs")
+    if is_dict(defs):
+        for def_name, def_schema in defs.items():
+            _ensure_strict_json_schema(def_schema, path=(*path, "$defs", def_name), root=root)
+
+    definitions = json_schema.get("definitions")
+    if is_dict(definitions):
+        for definition_name, definition_schema in definitions.items():
+            _ensure_strict_json_schema(definition_schema, path=(*path, "definitions", definition_name), root=root)
+
+    typ = json_schema.get("type")
+    if typ == "object" and "additionalProperties" not in json_schema:
+        json_schema["additionalProperties"] = False
+
+    # object types
+    # { 'type': 'object', 'properties': { 'a':  {...} } }
+    properties = json_schema.get("properties")
+    if is_dict(properties):
+        json_schema["required"] = [prop for prop in properties.keys()]
+        json_schema["properties"] = {
+            key: _ensure_strict_json_schema(prop_schema, path=(*path, "properties", key), root=root)
+            for key, prop_schema in properties.items()
+        }
+
+    # arrays
+    # { 'type': 'array', 'items': {...} }
+    items = json_schema.get("items")
+    if is_dict(items):
+        json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root)
+
+    # unions
+    any_of = json_schema.get("anyOf")
+    if is_list(any_of):
+        json_schema["anyOf"] = [
+            _ensure_strict_json_schema(variant, path=(*path, "anyOf", str(i)), root=root)
+            for i, variant in enumerate(any_of)
+        ]
+
+    # intersections
+    all_of = json_schema.get("allOf")
+    if is_list(all_of):
+        if len(all_of) == 1:
+            json_schema.update(_ensure_strict_json_schema(all_of[0], path=(*path, "allOf", "0"), root=root))
+            json_schema.pop("allOf")
+        else:
+            json_schema["allOf"] = [
+                _ensure_strict_json_schema(entry, path=(*path, "allOf", str(i)), root=root)
+                for i, entry in enumerate(all_of)
+            ]
+
+    # strip `None` defaults as there's no meaningful distinction here
+    # the schema will still be `nullable` and the model will default
+    # to using `None` anyway
+    if json_schema.get("default", NOT_GIVEN) is None:
+        json_schema.pop("default")
+
+    # we can't use `$ref`s if there are also other properties defined, e.g.
+    # `{"$ref": "...", "description": "my description"}`
+    #
+    # so we unravel the ref
+    # `{"type": "string", "description": "my description"}`
+    ref = json_schema.get("$ref")
+    if ref and has_more_than_n_keys(json_schema, 1):
+        assert isinstance(ref, str), f"Received non-string $ref - {ref}"
+
+        resolved = resolve_ref(root=root, ref=ref)
+        if not is_dict(resolved):
+            raise ValueError(f"Expected `$ref: {ref}` to resolved to a dictionary but got {resolved}")
+
+        # properties from the json schema take priority over the ones on the `$ref`
+        json_schema.update({**resolved, **json_schema})
+        json_schema.pop("$ref")
+        # Since the schema expanded from `$ref` might not have `additionalProperties: false` applied,
+        # we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
+        return _ensure_strict_json_schema(json_schema, path=path, root=root)
+
+    return json_schema
+
+
+def resolve_ref(*, root: dict[str, object], ref: str) -> object:
+    if not ref.startswith("#/"):
+        raise ValueError(f"Unexpected $ref format {ref!r}; Does not start with #/")
+
+    path = ref[2:].split("/")
+    resolved = root
+    for key in path:
+        value = resolved[key]
+        assert is_dict(value), f"encountered non-dictionary entry while resolving {ref} - {resolved}"
+        resolved = value
+
+    return resolved
+
+
+def is_basemodel_type(typ: type) -> TypeGuard[type[pydantic.BaseModel]]:
+    if not inspect.isclass(typ):
+        return False
+    return issubclass(typ, pydantic.BaseModel)
+
+
+def is_dataclass_like_type(typ: type) -> bool:
+    """Returns True if the given type likely used `@pydantic.dataclass`"""
+    return hasattr(typ, "__pydantic_config__")
+
+
+def is_dict(obj: object) -> TypeGuard[dict[str, object]]:
+    # just pretend that we know there are only `str` keys
+    # as that check is not worth the performance cost
+    return _is_dict(obj)
+
+
+def has_more_than_n_keys(obj: dict[str, object], n: int) -> bool:
+    i = 0
+    for _ in obj.keys():
+        i += 1
+        if i > n:
+            return True
+    return False
diff --git a/src/openai/lib/_tools.py b/src/openai/lib/_tools.py
new file mode 100644
index 0000000000..415d750074
--- /dev/null
+++ b/src/openai/lib/_tools.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from typing import Any, Dict, cast
+
+import pydantic
+
+from ._pydantic import to_strict_json_schema
+from ..types.chat import ChatCompletionToolParam
+from ..types.shared_params import FunctionDefinition
+from ..types.responses.function_tool_param import FunctionToolParam as ResponsesFunctionToolParam
+
+
+class PydanticFunctionTool(Dict[str, Any]):
+    """Dictionary wrapper so we can pass the given base model
+    throughout the entire request stack without having to special
+    case it.
+    """
+
+    model: type[pydantic.BaseModel]
+
+    def __init__(self, defn: FunctionDefinition, model: type[pydantic.BaseModel]) -> None:
+        super().__init__(defn)
+        self.model = model
+
+    def cast(self) -> FunctionDefinition:
+        return cast(FunctionDefinition, self)
+
+
+class ResponsesPydanticFunctionTool(Dict[str, Any]):
+    model: type[pydantic.BaseModel]
+
+    def __init__(self, tool: ResponsesFunctionToolParam, model: type[pydantic.BaseModel]) -> None:
+        super().__init__(tool)
+        self.model = model
+
+    def cast(self) -> ResponsesFunctionToolParam:
+        return cast(ResponsesFunctionToolParam, self)
+
+
+def pydantic_function_tool(
+    model: type[pydantic.BaseModel],
+    *,
+    name: str | None = None,  # inferred from class name by default
+    description: str | None = None,  # inferred from class docstring by default
+) -> ChatCompletionToolParam:
+    if description is None:
+        # note: we intentionally don't use `.getdoc()` to avoid
+        # including pydantic's docstrings
+        description = model.__doc__
+
+    function = PydanticFunctionTool(
+        {
+            "name": name or model.__name__,
+            "strict": True,
+            "parameters": to_strict_json_schema(model),
+        },
+        model,
+    ).cast()
+
+    if description is not None:
+        function["description"] = description
+
+    return {
+        "type": "function",
+        "function": function,
+    }
diff --git a/src/openai/lib/_validators.py b/src/openai/lib/_validators.py
index c8608c0cef..cf24cd2294 100644
--- a/src/openai/lib/_validators.py
+++ b/src/openai/lib/_validators.py
@@ -312,7 +312,7 @@ def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any:
         x["completion"] = x["completion"].str[len(prefix) :]
         if ws_prefix:
             # keep the single whitespace as prefix
-            x["completion"] = " " + x["completion"]
+            x["completion"] = f" {x['completion']}"
         return x
 
     if (df.completion == common_prefix).all():
@@ -623,9 +623,7 @@ def get_outfnames(fname: str, split: bool) -> list[str]:
     i = 0
     while True:
         index_suffix = f" ({i})" if i > 0 else ""
-        candidate_fnames = [
-            os.path.splitext(fname)[0] + "_prepared" + suffix + index_suffix + ".jsonl" for suffix in suffixes
-        ]
+        candidate_fnames = [f"{os.path.splitext(fname)[0]}_prepared{suffix}{index_suffix}.jsonl" for suffix in suffixes]
         if not any(os.path.isfile(f) for f in candidate_fnames):
             return candidate_fnames
         i += 1
@@ -680,9 +678,11 @@ def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_ac
             df_train = df.sample(n=n_train, random_state=42)
             df_valid = df.drop(df_train.index)
             df_train[["prompt", "completion"]].to_json(  # type: ignore
-                fnames[0], lines=True, orient="records", force_ascii=False
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
+            df_valid[["prompt", "completion"]].to_json(
+                fnames[1], lines=True, orient="records", force_ascii=False, indent=None
             )
-            df_valid[["prompt", "completion"]].to_json(fnames[1], lines=True, orient="records", force_ascii=False)
 
             n_classes, pos_class = get_classification_hyperparams(df)
             additional_params += " --compute_classification_metrics"
@@ -692,7 +692,9 @@ def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_ac
                 additional_params += f" --classification_n_classes {n_classes}"
         else:
             assert len(fnames) == 1
-            df[["prompt", "completion"]].to_json(fnames[0], lines=True, orient="records", force_ascii=False)
+            df[["prompt", "completion"]].to_json(
+                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
+            )
 
         # Add -v VALID_FILE if we split the file into train / valid
         files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
index 27bebd8cab..a994e4256c 100644
--- a/src/openai/lib/azure.py
+++ b/src/openai/lib/azure.py
@@ -2,14 +2,15 @@
 
 import os
 import inspect
-from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, overload
+from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, cast, overload
 from typing_extensions import Self, override
 
 import httpx
 
-from .._types import NOT_GIVEN, Omit, Timeout, NotGiven
+from .._types import NOT_GIVEN, Omit, Query, Timeout, NotGiven
 from .._utils import is_given, is_mapping
 from .._client import OpenAI, AsyncOpenAI
+from .._compat import model_copy
 from .._models import FinalRequestOptions
 from .._streaming import Stream, AsyncStream
 from .._exceptions import OpenAIError
@@ -22,7 +23,9 @@
         "/embeddings",
         "/audio/transcriptions",
         "/audio/translations",
+        "/audio/speech",
         "/images/generations",
+        "/images/edits",
     ]
 )
 
@@ -47,17 +50,40 @@ def __init__(self) -> None:
 
 
 class BaseAzureClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
+    _azure_endpoint: httpx.URL | None
+    _azure_deployment: str | None
+
     @override
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if options.url in _deployments_endpoints and is_mapping(options.json_data):
             model = options.json_data.get("model")
-            if model is not None and not "/deployments" in str(self.base_url):
+            if model is not None and "/deployments" not in str(self.base_url.path):
                 options.url = f"/deployments/{model}{options.url}"
 
-        return super()._build_request(options)
+        return super()._build_request(options, retries_taken=retries_taken)
+
+    @override
+    def _prepare_url(http://github.com/self,%20url:%20str) -> httpx.URL:
+        """Adjust the URL if the client was configured with an Azure endpoint + deployment
+        and the API feature being called is **not** a deployments-based endpoint
+        (i.e. requires /deployments/deployment-name in the URL path).
+        """
+        if self._azure_deployment and self._azure_endpoint and url not in _deployments_endpoints:
+            merge_url = httpx.URL(url)
+            if merge_url.is_relative_url:
+                merge_raw_path = (
+                    self._azure_endpoint.raw_path.rstrip(b"/") + b"/openai/" + merge_url.raw_path.lstrip(b"/")
+                )
+                return self._azure_endpoint.copy_with(raw_path=merge_raw_path)
+
+            return merge_url
+
+        return super()._prepare_http://github.com/url(url)
 
 
 class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
@@ -72,14 +98,15 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.Client | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
@@ -91,14 +118,15 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.Client | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
@@ -110,14 +138,15 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.Client | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     def __init__(
         self,
@@ -129,6 +158,9 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -142,6 +174,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `AZURE_OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
         - `api_version` from `OPENAI_API_VERSION`
         - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
@@ -153,8 +186,8 @@ def __init__(
 
             azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
 
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints.
+            azure_deployment: A model deployment, if given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
+                Not supported with Assistants APIs.
         """
         if api_key is None:
             api_key = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -190,9 +223,9 @@ def __init__(
                 )
 
             if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
+                base_url = f"{azure_endpoint.rstrip('/')}/openai/deployments/{azure_deployment}"
             else:
-                base_url = f"{azure_endpoint}/openai"
+                base_url = f"{azure_endpoint.rstrip('/')}/openai"
         else:
             if azure_endpoint is not None:
                 raise ValueError("base_url and azure_endpoint are mutually exclusive")
@@ -204,17 +237,22 @@ def __init__(
         super().__init__(
             api_key=api_key,
             organization=organization,
+            project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
             default_headers=default_headers,
             default_query=default_query,
             http_client=http_client,
+            websocket_base_url=websocket_base_url,
             _strict_response_validation=_strict_response_validation,
         )
         self._api_version = api_version
         self._azure_ad_token = azure_ad_token
         self._azure_ad_token_provider = azure_ad_token_provider
+        self._azure_deployment = azure_deployment if azure_endpoint else None
+        self._azure_endpoint = httpx.URL(azure_endpoint) if azure_endpoint else None
 
     @override
     def copy(
@@ -222,6 +260,9 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AzureADTokenProvider | None = None,
@@ -241,6 +282,9 @@ def copy(
         return super().copy(
             api_key=api_key,
             organization=organization,
+            project=project,
+            webhook_secret=webhook_secret,
+            websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,
@@ -275,8 +319,10 @@ def _get_azure_ad_token(self) -> str | None:
         return None
 
     @override
-    def _prepare_options(self, options: FinalRequestOptions) -> None:
+    def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
         headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
+
+        options = model_copy(options)
         options.headers = headers
 
         azure_ad_token = self._get_azure_ad_token()
@@ -290,7 +336,32 @@ def _prepare_options(self, options: FinalRequestOptions) -> None:
             # should never be hit
             raise ValueError("Unable to handle auth")
 
-        return super()._prepare_options(options)
+        return options
+
+    def _configure_realtime(self, model: str, extra_query: Query) -> tuple[httpx.URL, dict[str, str]]:
+        auth_headers = {}
+        query = {
+            **extra_query,
+            "api-version": self._api_version,
+            "deployment": self._azure_deployment or model,
+        }
+        if self.api_key != "<missing API key>":
+            auth_headers = {"api-key": self.api_key}
+        else:
+            token = self._get_azure_ad_token()
+            if token:
+                auth_headers = {"Authorization": f"Bearer {token}"}
+
+        if self.websocket_base_url is not None:
+            base_url = httpx.URL(self.websocket_base_url)
+            merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+            realtime_url = base_url.copy_with(raw_path=merge_raw_path)
+        else:
+            base_url = self._prepare_url("http://github.com/realtime")
+            realtime_url = base_url.copy_with(scheme="wss")
+
+        url = realtime_url.copy_with(params={**query})
+        return url, auth_headers
 
 
 class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], AsyncOpenAI):
@@ -305,14 +376,16 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.AsyncClient | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
@@ -324,14 +397,16 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.AsyncClient | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
@@ -343,14 +418,16 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         http_client: httpx.AsyncClient | None = None,
         _strict_response_validation: bool = False,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     def __init__(
         self,
@@ -362,7 +439,10 @@ def __init__(
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -375,6 +455,7 @@ def __init__(
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `AZURE_OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
+        - `project` from `OPENAI_PROJECT_ID`
         - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
         - `api_version` from `OPENAI_API_VERSION`
         - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
@@ -386,8 +467,8 @@ def __init__(
 
             azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
 
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints.
+            azure_deployment: A model deployment, if given with `azure_endpoint`, sets the base client URL to include `/deployments/{azure_deployment}`.
+                Not supported with Assistants APIs.
         """
         if api_key is None:
             api_key = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -423,9 +504,9 @@ def __init__(
                 )
 
             if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
+                base_url = f"{azure_endpoint.rstrip('/')}/openai/deployments/{azure_deployment}"
             else:
-                base_url = f"{azure_endpoint}/openai"
+                base_url = f"{azure_endpoint.rstrip('/')}/openai"
         else:
             if azure_endpoint is not None:
                 raise ValueError("base_url and azure_endpoint are mutually exclusive")
@@ -437,17 +518,22 @@ def __init__(
         super().__init__(
             api_key=api_key,
             organization=organization,
+            project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
             default_headers=default_headers,
             default_query=default_query,
             http_client=http_client,
+            websocket_base_url=websocket_base_url,
             _strict_response_validation=_strict_response_validation,
         )
         self._api_version = api_version
         self._azure_ad_token = azure_ad_token
         self._azure_ad_token_provider = azure_ad_token_provider
+        self._azure_deployment = azure_deployment if azure_endpoint else None
+        self._azure_endpoint = httpx.URL(azure_endpoint) if azure_endpoint else None
 
     @override
     def copy(
@@ -455,6 +541,9 @@ def copy(
         *,
         api_key: str | None = None,
         organization: str | None = None,
+        project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         api_version: str | None = None,
         azure_ad_token: str | None = None,
         azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
@@ -474,6 +563,9 @@ def copy(
         return super().copy(
             api_key=api_key,
             organization=organization,
+            project=project,
+            webhook_secret=webhook_secret,
+            websocket_base_url=websocket_base_url,
             base_url=base_url,
             timeout=timeout,
             http_client=http_client,
@@ -501,17 +593,19 @@ async def _get_azure_ad_token(self) -> str | None:
             token = provider()
             if inspect.isawaitable(token):
                 token = await token
-            if not token or not isinstance(token, str):
+            if not token or not isinstance(cast(Any, token), str):
                 raise ValueError(
                     f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
                 )
-            return token
+            return str(token)
 
         return None
 
     @override
-    async def _prepare_options(self, options: FinalRequestOptions) -> None:
+    async def _prepare_options(self, options: FinalRequestOptions) -> FinalRequestOptions:
         headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
+
+        options = model_copy(options)
         options.headers = headers
 
         azure_ad_token = await self._get_azure_ad_token()
@@ -525,4 +619,29 @@ async def _prepare_options(self, options: FinalRequestOptions) -> None:
             # should never be hit
             raise ValueError("Unable to handle auth")
 
-        return await super()._prepare_options(options)
+        return options
+
+    async def _configure_realtime(self, model: str, extra_query: Query) -> tuple[httpx.URL, dict[str, str]]:
+        auth_headers = {}
+        query = {
+            **extra_query,
+            "api-version": self._api_version,
+            "deployment": self._azure_deployment or model,
+        }
+        if self.api_key != "<missing API key>":
+            auth_headers = {"api-key": self.api_key}
+        else:
+            token = await self._get_azure_ad_token()
+            if token:
+                auth_headers = {"Authorization": f"Bearer {token}"}
+
+        if self.websocket_base_url is not None:
+            base_url = httpx.URL(self.websocket_base_url)
+            merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+            realtime_url = base_url.copy_with(raw_path=merge_raw_path)
+        else:
+            base_url = self._prepare_url("http://github.com/realtime")
+            realtime_url = base_url.copy_with(scheme="wss")
+
+        url = realtime_url.copy_with(params={**query})
+        return url, auth_headers
diff --git a/src/openai/lib/streaming/__init__.py b/src/openai/lib/streaming/__init__.py
new file mode 100644
index 0000000000..eb378d2561
--- /dev/null
+++ b/src/openai/lib/streaming/__init__.py
@@ -0,0 +1,8 @@
+from ._assistants import (
+    AssistantEventHandler as AssistantEventHandler,
+    AssistantEventHandlerT as AssistantEventHandlerT,
+    AssistantStreamManager as AssistantStreamManager,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
+)
diff --git a/src/openai/lib/streaming/_assistants.py b/src/openai/lib/streaming/_assistants.py
new file mode 100644
index 0000000000..6efb3ca3f1
--- /dev/null
+++ b/src/openai/lib/streaming/_assistants.py
@@ -0,0 +1,1038 @@
+from __future__ import annotations
+
+import asyncio
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
+from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
+
+import httpx
+
+from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
+from ..._compat import model_dump
+from ..._models import construct_type
+from ..._streaming import Stream, AsyncStream
+from ...types.beta import AssistantStreamEvent
+from ...types.beta.threads import (
+    Run,
+    Text,
+    Message,
+    ImageFile,
+    TextDelta,
+    MessageDelta,
+    MessageContent,
+    MessageContentDelta,
+)
+from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
+
+
+class AssistantEventHandler:
+    text_deltas: Iterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    def __next__(self) -> AssistantStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[AssistantStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            self.__stream.close()
+
+    def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    def __text_deltas__(self) -> Iterator[str]:
+        for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equation")),
+        """
+
+    def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+            or event.event == "thread.run.incomplete"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    self.on_image_file_done(content.image_file)
+
+            self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+
+            self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    def __stream__(self) -> Iterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            for event in stream:
+                self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            self.on_timeout()
+            self.on_exception(exc)
+            raise
+        except Exception as exc:
+            self.on_exception(exc)
+            raise
+        finally:
+            self.on_end()
+
+
+AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
+
+
+class AssistantStreamManager(Generic[AssistantEventHandlerT]):
+    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
+    so that a context manager can be used.
+
+    ```py
+    with client.threads.create_and_run_stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[AssistantStreamEvent]],
+        *,
+        event_handler: AssistantEventHandlerT,
+    ) -> None:
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    def __enter__(self) -> AssistantEventHandlerT:
+        self.__stream = self.__api_request()
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncAssistantEventHandler:
+    text_deltas: AsyncIterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    async for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    async def __anext__(self) -> AssistantStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            await self.__stream.close()
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    async def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        await self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    async def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        await self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    async def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        await self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    async def __text_deltas__(self) -> AsyncIterator[str]:
+        async for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    async def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    async def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    async def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    async def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    async def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    async def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    async def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    async def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    async def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    async def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        await self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                await self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+            or event.event == "thread.run.incomplete"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            await self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        await self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                await self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                await self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            await self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    await self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    await self.on_image_file_done(content.image_file)
+
+            await self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            await self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        await self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            await self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        await self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            await self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+
+            await self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            async for event in stream:
+                await self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            await self.on_timeout()
+            await self.on_exception(exc)
+            raise
+        except Exception as exc:
+            await self.on_exception(exc)
+            raise
+        finally:
+            await self.on_end()
+
+
+AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
+
+
+class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
+    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.threads.create_and_run_stream(...) as stream:
+        async for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
+        *,
+        event_handler: AsyncAssistantEventHandlerT,
+    ) -> None:
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
+        self.__stream = await self.__api_request
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def accumulate_run_step(
+    *,
+    event: AssistantStreamEvent,
+    run_step_snapshots: dict[str, RunStep],
+) -> None:
+    if event.event == "thread.run.step.created":
+        run_step_snapshots[event.data.id] = event.data
+        return
+
+    if event.event == "thread.run.step.delta":
+        data = event.data
+        snapshot = run_step_snapshots[data.id]
+
+        if data.delta:
+            merged = accumulate_delta(
+                cast(
+                    "dict[object, object]",
+                    model_dump(snapshot, exclude_unset=True, warnings=False),
+                ),
+                cast(
+                    "dict[object, object]",
+                    model_dump(data.delta, exclude_unset=True, warnings=False),
+                ),
+            )
+            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
+
+    return None
+
+
+def accumulate_event(
+    *,
+    event: AssistantStreamEvent,
+    current_message_snapshot: Message | None,
+) -> tuple[Message | None, list[MessageContentDelta]]:
+    """Returns a tuple of message snapshot and newly created text message deltas"""
+    if event.event == "thread.message.created":
+        return event.data, []
+
+    new_content: list[MessageContentDelta] = []
+
+    if event.event != "thread.message.delta":
+        return current_message_snapshot, []
+
+    if not current_message_snapshot:
+        raise RuntimeError("Encountered a message delta with no previous snapshot")
+
+    data = event.data
+    if data.delta.content:
+        for content_delta in data.delta.content:
+            try:
+                block = current_message_snapshot.content[content_delta.index]
+            except IndexError:
+                current_message_snapshot.content.insert(
+                    content_delta.index,
+                    cast(
+                        MessageContent,
+                        construct_type(
+                            # mypy doesn't allow Content for some reason
+                            type_=cast(Any, MessageContent),
+                            value=model_dump(content_delta, exclude_unset=True, warnings=False),
+                        ),
+                    ),
+                )
+                new_content.append(content_delta)
+            else:
+                merged = accumulate_delta(
+                    cast(
+                        "dict[object, object]",
+                        model_dump(block, exclude_unset=True, warnings=False),
+                    ),
+                    cast(
+                        "dict[object, object]",
+                        model_dump(content_delta, exclude_unset=True, warnings=False),
+                    ),
+                )
+                current_message_snapshot.content[content_delta.index] = cast(
+                    MessageContent,
+                    construct_type(
+                        # mypy doesn't allow Content for some reason
+                        type_=cast(Any, MessageContent),
+                        value=merged,
+                    ),
+                )
+
+    return current_message_snapshot, new_content
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
diff --git a/src/openai/lib/streaming/_deltas.py b/src/openai/lib/streaming/_deltas.py
new file mode 100644
index 0000000000..a5e1317612
--- /dev/null
+++ b/src/openai/lib/streaming/_deltas.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from ..._utils import is_dict, is_list
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
diff --git a/src/openai/lib/streaming/chat/__init__.py b/src/openai/lib/streaming/chat/__init__.py
new file mode 100644
index 0000000000..dfa3f3f2e3
--- /dev/null
+++ b/src/openai/lib/streaming/chat/__init__.py
@@ -0,0 +1,27 @@
+from ._types import (
+    ParsedChoiceSnapshot as ParsedChoiceSnapshot,
+    ParsedChatCompletionSnapshot as ParsedChatCompletionSnapshot,
+    ParsedChatCompletionMessageSnapshot as ParsedChatCompletionMessageSnapshot,
+)
+from ._events import (
+    ChunkEvent as ChunkEvent,
+    ContentDoneEvent as ContentDoneEvent,
+    RefusalDoneEvent as RefusalDoneEvent,
+    ContentDeltaEvent as ContentDeltaEvent,
+    RefusalDeltaEvent as RefusalDeltaEvent,
+    LogprobsContentDoneEvent as LogprobsContentDoneEvent,
+    LogprobsRefusalDoneEvent as LogprobsRefusalDoneEvent,
+    ChatCompletionStreamEvent as ChatCompletionStreamEvent,
+    LogprobsContentDeltaEvent as LogprobsContentDeltaEvent,
+    LogprobsRefusalDeltaEvent as LogprobsRefusalDeltaEvent,
+    ParsedChatCompletionSnapshot as ParsedChatCompletionSnapshot,
+    FunctionToolCallArgumentsDoneEvent as FunctionToolCallArgumentsDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent as FunctionToolCallArgumentsDeltaEvent,
+)
+from ._completions import (
+    ChatCompletionStream as ChatCompletionStream,
+    AsyncChatCompletionStream as AsyncChatCompletionStream,
+    ChatCompletionStreamState as ChatCompletionStreamState,
+    ChatCompletionStreamManager as ChatCompletionStreamManager,
+    AsyncChatCompletionStreamManager as AsyncChatCompletionStreamManager,
+)
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
new file mode 100644
index 0000000000..2cf37efeae
--- /dev/null
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -0,0 +1,770 @@
+from __future__ import annotations
+
+import inspect
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, Callable, Iterable, Awaitable, AsyncIterator, cast
+from typing_extensions import Self, Iterator, assert_never
+
+from jiter import from_json
+
+from ._types import ParsedChoiceSnapshot, ParsedChatCompletionSnapshot, ParsedChatCompletionMessageSnapshot
+from ._events import (
+    ChunkEvent,
+    ContentDoneEvent,
+    RefusalDoneEvent,
+    ContentDeltaEvent,
+    RefusalDeltaEvent,
+    LogprobsContentDoneEvent,
+    LogprobsRefusalDoneEvent,
+    ChatCompletionStreamEvent,
+    LogprobsContentDeltaEvent,
+    LogprobsRefusalDeltaEvent,
+    FunctionToolCallArgumentsDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent,
+)
+from .._deltas import accumulate_delta
+from ...._types import NOT_GIVEN, IncEx, NotGiven
+from ...._utils import is_given, consume_sync_iterator, consume_async_iterator
+from ...._compat import model_dump
+from ...._models import build, construct_type
+from ..._parsing import (
+    ResponseFormatT,
+    has_parseable_input,
+    maybe_parse_content,
+    parse_chat_completion,
+    get_input_tool_by_name,
+    solve_response_format_t,
+    parse_function_tool_arguments,
+)
+from ...._streaming import Stream, AsyncStream
+from ....types.chat import ChatCompletionChunk, ParsedChatCompletion, ChatCompletionToolParam
+from ...._exceptions import LengthFinishReasonError, ContentFilterFinishReasonError
+from ....types.chat.chat_completion import ChoiceLogprobs
+from ....types.chat.chat_completion_chunk import Choice as ChoiceChunk
+from ....types.chat.completion_create_params import ResponseFormat as ResponseFormatParam
+
+
+class ChatCompletionStream(Generic[ResponseFormatT]):
+    """Wrapper over the Chat Completions streaming API that adds helpful
+    events such as `content.done`, supports automatically parsing
+    responses & tool calls and accumulates a `ChatCompletion` object
+    from each individual chunk.
+
+    https://platform.openai.com/docs/api-reference/streaming
+    """
+
+    def __init__(
+        self,
+        *,
+        raw_stream: Stream[ChatCompletionChunk],
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ChatCompletionStreamState(response_format=response_format, input_tools=input_tools)
+
+    def __next__(self) -> ChatCompletionStreamEvent[ResponseFormatT]:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self._response.close()
+
+    def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedChatCompletion` object.
+
+        If you passed a class type to `.stream()`, the `completion.choices[0].message.parsed`
+        property will be the content deserialised into that class, if there was any content returned
+        by the API.
+        """
+        self.until_done()
+        return self._state.get_final_completion()
+
+    def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        consume_sync_iterator(self)
+        return self
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        return self._state.current_completion_snapshot
+
+    def __stream__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        for sse_event in self._raw_stream:
+            if not _is_valid_chat_completion_chunk_weak(sse_event):
+                continue
+            events_to_fire = self._state.handle_chunk(sse_event)
+            for event in events_to_fire:
+                yield event
+
+
+class ChatCompletionStreamManager(Generic[ResponseFormatT]):
+    """Context manager over a `ChatCompletionStream` that is returned by `.stream()`.
+
+    This context manager ensures the response cannot be leaked if you don't read
+    the stream to completion.
+
+    Usage:
+    ```py
+    with client.chat.completions.stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[ChatCompletionChunk]],
+        *,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self.__stream: ChatCompletionStream[ResponseFormatT] | None = None
+        self.__api_request = api_request
+        self.__response_format = response_format
+        self.__input_tools = input_tools
+
+    def __enter__(self) -> ChatCompletionStream[ResponseFormatT]:
+        raw_stream = self.__api_request()
+
+        self.__stream = ChatCompletionStream(
+            raw_stream=raw_stream,
+            response_format=self.__response_format,
+            input_tools=self.__input_tools,
+        )
+
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncChatCompletionStream(Generic[ResponseFormatT]):
+    """Wrapper over the Chat Completions streaming API that adds helpful
+    events such as `content.done`, supports automatically parsing
+    responses & tool calls and accumulates a `ChatCompletion` object
+    from each individual chunk.
+
+    https://platform.openai.com/docs/api-reference/streaming
+    """
+
+    def __init__(
+        self,
+        *,
+        raw_stream: AsyncStream[ChatCompletionChunk],
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ChatCompletionStreamState(response_format=response_format, input_tools=input_tools)
+
+    async def __anext__(self) -> ChatCompletionStreamEvent[ResponseFormatT]:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        async for item in self._iterator:
+            yield item
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self._response.aclose()
+
+    async def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedChatCompletion` object.
+
+        If you passed a class type to `.stream()`, the `completion.choices[0].message.parsed`
+        property will be the content deserialised into that class, if there was any content returned
+        by the API.
+        """
+        await self.until_done()
+        return self._state.get_final_completion()
+
+    async def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        await consume_async_iterator(self)
+        return self
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        return self._state.current_completion_snapshot
+
+    async def __stream__(self) -> AsyncIterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        async for sse_event in self._raw_stream:
+            if not _is_valid_chat_completion_chunk_weak(sse_event):
+                continue
+            events_to_fire = self._state.handle_chunk(sse_event)
+            for event in events_to_fire:
+                yield event
+
+
+class AsyncChatCompletionStreamManager(Generic[ResponseFormatT]):
+    """Context manager over a `AsyncChatCompletionStream` that is returned by `.stream()`.
+
+    This context manager ensures the response cannot be leaked if you don't read
+    the stream to completion.
+
+    Usage:
+    ```py
+    async with client.chat.completions.stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[ChatCompletionChunk]],
+        *,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven,
+    ) -> None:
+        self.__stream: AsyncChatCompletionStream[ResponseFormatT] | None = None
+        self.__api_request = api_request
+        self.__response_format = response_format
+        self.__input_tools = input_tools
+
+    async def __aenter__(self) -> AsyncChatCompletionStream[ResponseFormatT]:
+        raw_stream = await self.__api_request
+
+        self.__stream = AsyncChatCompletionStream(
+            raw_stream=raw_stream,
+            response_format=self.__response_format,
+            input_tools=self.__input_tools,
+        )
+
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+class ChatCompletionStreamState(Generic[ResponseFormatT]):
+    """Helper class for manually accumulating `ChatCompletionChunk`s into a final `ChatCompletion` object.
+
+    This is useful in cases where you can't always use the `.stream()` method, e.g.
+
+    ```py
+    from openai.lib.streaming.chat import ChatCompletionStreamState
+
+    state = ChatCompletionStreamState()
+
+    stream = client.chat.completions.create(..., stream=True)
+    for chunk in response:
+        state.handle_chunk(chunk)
+
+        # can also access the accumulated `ChatCompletion` mid-stream
+        state.current_completion_snapshot
+
+    print(state.get_final_completion())
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        input_tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven = NOT_GIVEN,
+    ) -> None:
+        self.__current_completion_snapshot: ParsedChatCompletionSnapshot | None = None
+        self.__choice_event_states: list[ChoiceEventState] = []
+
+        self._input_tools = [tool for tool in input_tools] if is_given(input_tools) else []
+        self._response_format = response_format
+        self._rich_response_format: type | NotGiven = response_format if inspect.isclass(response_format) else NOT_GIVEN
+
+    def get_final_completion(self) -> ParsedChatCompletion[ResponseFormatT]:
+        """Parse the final completion object.
+
+        Note this does not provide any guarantees that the stream has actually finished, you must
+        only call this method when the stream is finished.
+        """
+        return parse_chat_completion(
+            chat_completion=self.current_completion_snapshot,
+            response_format=self._rich_response_format,
+            input_tools=self._input_tools,
+        )
+
+    @property
+    def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
+        assert self.__current_completion_snapshot is not None
+        return self.__current_completion_snapshot
+
+    def handle_chunk(self, chunk: ChatCompletionChunk) -> Iterable[ChatCompletionStreamEvent[ResponseFormatT]]:
+        """Accumulate a new chunk into the snapshot and returns an iterable of events to yield."""
+        self.__current_completion_snapshot = self._accumulate_chunk(chunk)
+
+        return self._build_events(
+            chunk=chunk,
+            completion_snapshot=self.__current_completion_snapshot,
+        )
+
+    def _get_choice_state(self, choice: ChoiceChunk) -> ChoiceEventState:
+        try:
+            return self.__choice_event_states[choice.index]
+        except IndexError:
+            choice_state = ChoiceEventState(input_tools=self._input_tools)
+            self.__choice_event_states.append(choice_state)
+            return choice_state
+
+    def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionSnapshot:
+        completion_snapshot = self.__current_completion_snapshot
+
+        if completion_snapshot is None:
+            return _convert_initial_chunk_into_snapshot(chunk)
+
+        for choice in chunk.choices:
+            try:
+                choice_snapshot = completion_snapshot.choices[choice.index]
+                previous_tool_calls = choice_snapshot.message.tool_calls or []
+
+                choice_snapshot.message = cast(
+                    ParsedChatCompletionMessageSnapshot,
+                    construct_type(
+                        type_=ParsedChatCompletionMessageSnapshot,
+                        value=accumulate_delta(
+                            cast(
+                                "dict[object, object]",
+                                model_dump(
+                                    choice_snapshot.message,
+                                    # we don't want to serialise / deserialise our custom properties
+                                    # as they won't appear in the delta and we don't want to have to
+                                    # continuosly reparse the content
+                                    exclude=cast(
+                                        # cast required as mypy isn't smart enough to infer `True` here to `Literal[True]`
+                                        IncEx,
+                                        {
+                                            "parsed": True,
+                                            "tool_calls": {
+                                                idx: {"function": {"parsed_arguments": True}}
+                                                for idx, _ in enumerate(choice_snapshot.message.tool_calls or [])
+                                            },
+                                        },
+                                    ),
+                                ),
+                            ),
+                            cast("dict[object, object]", choice.delta.to_dict()),
+                        ),
+                    ),
+                )
+
+                # ensure tools that have already been parsed are added back into the newly
+                # constructed message snapshot
+                for tool_index, prev_tool in enumerate(previous_tool_calls):
+                    new_tool = (choice_snapshot.message.tool_calls or [])[tool_index]
+
+                    if prev_tool.type == "function":
+                        assert new_tool.type == "function"
+                        new_tool.function.parsed_arguments = prev_tool.function.parsed_arguments
+                    elif TYPE_CHECKING:  # type: ignore[unreachable]
+                        assert_never(prev_tool)
+            except IndexError:
+                choice_snapshot = cast(
+                    ParsedChoiceSnapshot,
+                    construct_type(
+                        type_=ParsedChoiceSnapshot,
+                        value={
+                            **choice.model_dump(exclude_unset=True, exclude={"delta"}),
+                            "message": choice.delta.to_dict(),
+                        },
+                    ),
+                )
+                completion_snapshot.choices.append(choice_snapshot)
+
+            if choice.finish_reason:
+                choice_snapshot.finish_reason = choice.finish_reason
+
+                if has_parseable_input(response_format=self._response_format, input_tools=self._input_tools):
+                    if choice.finish_reason == "length":
+                        # at the time of writing, `.usage` will always be `None` but
+                        # we include it here in case that is changed in the future
+                        raise LengthFinishReasonError(completion=completion_snapshot)
+
+                    if choice.finish_reason == "content_filter":
+                        raise ContentFilterFinishReasonError()
+
+            if (
+                choice_snapshot.message.content
+                and not choice_snapshot.message.refusal
+                and is_given(self._rich_response_format)
+                # partial parsing fails on white-space
+                and choice_snapshot.message.content.lstrip()
+            ):
+                choice_snapshot.message.parsed = from_json(
+                    bytes(choice_snapshot.message.content, "utf-8"),
+                    partial_mode=True,
+                )
+
+            for tool_call_chunk in choice.delta.tool_calls or []:
+                tool_call_snapshot = (choice_snapshot.message.tool_calls or [])[tool_call_chunk.index]
+
+                if tool_call_snapshot.type == "function":
+                    input_tool = get_input_tool_by_name(
+                        input_tools=self._input_tools, name=tool_call_snapshot.function.name
+                    )
+
+                    if (
+                        input_tool
+                        and input_tool.get("function", {}).get("strict")
+                        and tool_call_snapshot.function.arguments
+                    ):
+                        tool_call_snapshot.function.parsed_arguments = from_json(
+                            bytes(tool_call_snapshot.function.arguments, "utf-8"),
+                            partial_mode=True,
+                        )
+                elif TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(tool_call_snapshot)
+
+            if choice.logprobs is not None:
+                if choice_snapshot.logprobs is None:
+                    choice_snapshot.logprobs = build(
+                        ChoiceLogprobs,
+                        content=choice.logprobs.content,
+                        refusal=choice.logprobs.refusal,
+                    )
+                else:
+                    if choice.logprobs.content:
+                        if choice_snapshot.logprobs.content is None:
+                            choice_snapshot.logprobs.content = []
+
+                        choice_snapshot.logprobs.content.extend(choice.logprobs.content)
+
+                    if choice.logprobs.refusal:
+                        if choice_snapshot.logprobs.refusal is None:
+                            choice_snapshot.logprobs.refusal = []
+
+                        choice_snapshot.logprobs.refusal.extend(choice.logprobs.refusal)
+
+        completion_snapshot.usage = chunk.usage
+        completion_snapshot.system_fingerprint = chunk.system_fingerprint
+
+        return completion_snapshot
+
+    def _build_events(
+        self,
+        *,
+        chunk: ChatCompletionChunk,
+        completion_snapshot: ParsedChatCompletionSnapshot,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        events_to_fire.append(
+            build(ChunkEvent, type="chunk", chunk=chunk, snapshot=completion_snapshot),
+        )
+
+        for choice in chunk.choices:
+            choice_state = self._get_choice_state(choice)
+            choice_snapshot = completion_snapshot.choices[choice.index]
+
+            if choice.delta.content is not None and choice_snapshot.message.content is not None:
+                events_to_fire.append(
+                    build(
+                        ContentDeltaEvent,
+                        type="content.delta",
+                        delta=choice.delta.content,
+                        snapshot=choice_snapshot.message.content,
+                        parsed=choice_snapshot.message.parsed,
+                    )
+                )
+
+            if choice.delta.refusal is not None and choice_snapshot.message.refusal is not None:
+                events_to_fire.append(
+                    build(
+                        RefusalDeltaEvent,
+                        type="refusal.delta",
+                        delta=choice.delta.refusal,
+                        snapshot=choice_snapshot.message.refusal,
+                    )
+                )
+
+            if choice.delta.tool_calls:
+                tool_calls = choice_snapshot.message.tool_calls
+                assert tool_calls is not None
+
+                for tool_call_delta in choice.delta.tool_calls:
+                    tool_call = tool_calls[tool_call_delta.index]
+
+                    if tool_call.type == "function":
+                        assert tool_call_delta.function is not None
+                        events_to_fire.append(
+                            build(
+                                FunctionToolCallArgumentsDeltaEvent,
+                                type="tool_calls.function.arguments.delta",
+                                name=tool_call.function.name,
+                                index=tool_call_delta.index,
+                                arguments=tool_call.function.arguments,
+                                parsed_arguments=tool_call.function.parsed_arguments,
+                                arguments_delta=tool_call_delta.function.arguments or "",
+                            )
+                        )
+                    elif TYPE_CHECKING:  # type: ignore[unreachable]
+                        assert_never(tool_call)
+
+            if choice.logprobs is not None and choice_snapshot.logprobs is not None:
+                if choice.logprobs.content and choice_snapshot.logprobs.content:
+                    events_to_fire.append(
+                        build(
+                            LogprobsContentDeltaEvent,
+                            type="logprobs.content.delta",
+                            content=choice.logprobs.content,
+                            snapshot=choice_snapshot.logprobs.content,
+                        ),
+                    )
+
+                if choice.logprobs.refusal and choice_snapshot.logprobs.refusal:
+                    events_to_fire.append(
+                        build(
+                            LogprobsRefusalDeltaEvent,
+                            type="logprobs.refusal.delta",
+                            refusal=choice.logprobs.refusal,
+                            snapshot=choice_snapshot.logprobs.refusal,
+                        ),
+                    )
+
+            events_to_fire.extend(
+                choice_state.get_done_events(
+                    choice_chunk=choice,
+                    choice_snapshot=choice_snapshot,
+                    response_format=self._response_format,
+                )
+            )
+
+        return events_to_fire
+
+
+class ChoiceEventState:
+    def __init__(self, *, input_tools: list[ChatCompletionToolParam]) -> None:
+        self._input_tools = input_tools
+
+        self._content_done = False
+        self._refusal_done = False
+        self._logprobs_content_done = False
+        self._logprobs_refusal_done = False
+        self._done_tool_calls: set[int] = set()
+        self.__current_tool_call_index: int | None = None
+
+    def get_done_events(
+        self,
+        *,
+        choice_chunk: ChoiceChunk,
+        choice_snapshot: ParsedChoiceSnapshot,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        if choice_snapshot.finish_reason:
+            events_to_fire.extend(
+                self._content_done_events(choice_snapshot=choice_snapshot, response_format=response_format)
+            )
+
+            if (
+                self.__current_tool_call_index is not None
+                and self.__current_tool_call_index not in self._done_tool_calls
+            ):
+                self._add_tool_done_event(
+                    events_to_fire=events_to_fire,
+                    choice_snapshot=choice_snapshot,
+                    tool_index=self.__current_tool_call_index,
+                )
+
+        for tool_call in choice_chunk.delta.tool_calls or []:
+            if self.__current_tool_call_index != tool_call.index:
+                events_to_fire.extend(
+                    self._content_done_events(choice_snapshot=choice_snapshot, response_format=response_format)
+                )
+
+                if self.__current_tool_call_index is not None:
+                    self._add_tool_done_event(
+                        events_to_fire=events_to_fire,
+                        choice_snapshot=choice_snapshot,
+                        tool_index=self.__current_tool_call_index,
+                    )
+
+            self.__current_tool_call_index = tool_call.index
+
+        return events_to_fire
+
+    def _content_done_events(
+        self,
+        *,
+        choice_snapshot: ParsedChoiceSnapshot,
+        response_format: type[ResponseFormatT] | ResponseFormatParam | NotGiven,
+    ) -> list[ChatCompletionStreamEvent[ResponseFormatT]]:
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+        if choice_snapshot.message.content and not self._content_done:
+            self._content_done = True
+
+            parsed = maybe_parse_content(
+                response_format=response_format,
+                message=choice_snapshot.message,
+            )
+
+            # update the parsed content to now use the richer `response_format`
+            # as opposed to the raw JSON-parsed object as the content is now
+            # complete and can be fully validated.
+            choice_snapshot.message.parsed = parsed
+
+            events_to_fire.append(
+                build(
+                    # we do this dance so that when the `ContentDoneEvent` instance
+                    # is printed at runtime the class name will include the solved
+                    # type variable, e.g. `ContentDoneEvent[MyModelType]`
+                    cast(  # pyright: ignore[reportUnnecessaryCast]
+                        "type[ContentDoneEvent[ResponseFormatT]]",
+                        cast(Any, ContentDoneEvent)[solve_response_format_t(response_format)],
+                    ),
+                    type="content.done",
+                    content=choice_snapshot.message.content,
+                    parsed=parsed,
+                ),
+            )
+
+        if choice_snapshot.message.refusal is not None and not self._refusal_done:
+            self._refusal_done = True
+            events_to_fire.append(
+                build(RefusalDoneEvent, type="refusal.done", refusal=choice_snapshot.message.refusal),
+            )
+
+        if (
+            choice_snapshot.logprobs is not None
+            and choice_snapshot.logprobs.content is not None
+            and not self._logprobs_content_done
+        ):
+            self._logprobs_content_done = True
+            events_to_fire.append(
+                build(LogprobsContentDoneEvent, type="logprobs.content.done", content=choice_snapshot.logprobs.content),
+            )
+
+        if (
+            choice_snapshot.logprobs is not None
+            and choice_snapshot.logprobs.refusal is not None
+            and not self._logprobs_refusal_done
+        ):
+            self._logprobs_refusal_done = True
+            events_to_fire.append(
+                build(LogprobsRefusalDoneEvent, type="logprobs.refusal.done", refusal=choice_snapshot.logprobs.refusal),
+            )
+
+        return events_to_fire
+
+    def _add_tool_done_event(
+        self,
+        *,
+        events_to_fire: list[ChatCompletionStreamEvent[ResponseFormatT]],
+        choice_snapshot: ParsedChoiceSnapshot,
+        tool_index: int,
+    ) -> None:
+        if tool_index in self._done_tool_calls:
+            return
+
+        self._done_tool_calls.add(tool_index)
+
+        assert choice_snapshot.message.tool_calls is not None
+        tool_call_snapshot = choice_snapshot.message.tool_calls[tool_index]
+
+        if tool_call_snapshot.type == "function":
+            parsed_arguments = parse_function_tool_arguments(
+                input_tools=self._input_tools, function=tool_call_snapshot.function
+            )
+
+            # update the parsed content to potentially use a richer type
+            # as opposed to the raw JSON-parsed object as the content is now
+            # complete and can be fully validated.
+            tool_call_snapshot.function.parsed_arguments = parsed_arguments
+
+            events_to_fire.append(
+                build(
+                    FunctionToolCallArgumentsDoneEvent,
+                    type="tool_calls.function.arguments.done",
+                    index=tool_index,
+                    name=tool_call_snapshot.function.name,
+                    arguments=tool_call_snapshot.function.arguments,
+                    parsed_arguments=parsed_arguments,
+                )
+            )
+        elif TYPE_CHECKING:  # type: ignore[unreachable]
+            assert_never(tool_call_snapshot)
+
+
+def _convert_initial_chunk_into_snapshot(chunk: ChatCompletionChunk) -> ParsedChatCompletionSnapshot:
+    data = chunk.to_dict()
+    choices = cast("list[object]", data["choices"])
+
+    for choice in chunk.choices:
+        choices[choice.index] = {
+            **choice.model_dump(exclude_unset=True, exclude={"delta"}),
+            "message": choice.delta.to_dict(),
+        }
+
+    return cast(
+        ParsedChatCompletionSnapshot,
+        construct_type(
+            type_=ParsedChatCompletionSnapshot,
+            value={
+                "system_fingerprint": None,
+                **data,
+                "object": "chat.completion",
+            },
+        ),
+    )
+
+
+def _is_valid_chat_completion_chunk_weak(sse_event: ChatCompletionChunk) -> bool:
+    # Although the _raw_stream is always supposed to contain only objects adhering to ChatCompletionChunk schema,
+    # this is broken by the Azure OpenAI in case of Asynchronous Filter enabled.
+    # An easy filter is to check for the "object" property:
+    # - should be "chat.completion.chunk" for a ChatCompletionChunk;
+    # - is an empty string for Asynchronous Filter events.
+    return sse_event.object == "chat.completion.chunk"  # type: ignore # pylance reports this as a useless check
diff --git a/src/openai/lib/streaming/chat/_events.py b/src/openai/lib/streaming/chat/_events.py
new file mode 100644
index 0000000000..d4c1f28300
--- /dev/null
+++ b/src/openai/lib/streaming/chat/_events.py
@@ -0,0 +1,123 @@
+from typing import List, Union, Generic, Optional
+from typing_extensions import Literal
+
+from ._types import ParsedChatCompletionSnapshot
+from ...._models import BaseModel, GenericModel
+from ..._parsing import ResponseFormatT
+from ....types.chat import ChatCompletionChunk, ChatCompletionTokenLogprob
+
+
+class ChunkEvent(BaseModel):
+    type: Literal["chunk"]
+
+    chunk: ChatCompletionChunk
+
+    snapshot: ParsedChatCompletionSnapshot
+
+
+class ContentDeltaEvent(BaseModel):
+    """This event is yielded for every chunk with `choice.delta.content` data."""
+
+    type: Literal["content.delta"]
+
+    delta: str
+
+    snapshot: str
+
+    parsed: Optional[object] = None
+
+
+class ContentDoneEvent(GenericModel, Generic[ResponseFormatT]):
+    type: Literal["content.done"]
+
+    content: str
+
+    parsed: Optional[ResponseFormatT] = None
+
+
+class RefusalDeltaEvent(BaseModel):
+    type: Literal["refusal.delta"]
+
+    delta: str
+
+    snapshot: str
+
+
+class RefusalDoneEvent(BaseModel):
+    type: Literal["refusal.done"]
+
+    refusal: str
+
+
+class FunctionToolCallArgumentsDeltaEvent(BaseModel):
+    type: Literal["tool_calls.function.arguments.delta"]
+
+    name: str
+
+    index: int
+
+    arguments: str
+    """Accumulated raw JSON string"""
+
+    parsed_arguments: object
+    """The parsed arguments so far"""
+
+    arguments_delta: str
+    """The JSON string delta"""
+
+
+class FunctionToolCallArgumentsDoneEvent(BaseModel):
+    type: Literal["tool_calls.function.arguments.done"]
+
+    name: str
+
+    index: int
+
+    arguments: str
+    """Accumulated raw JSON string"""
+
+    parsed_arguments: object
+    """The parsed arguments"""
+
+
+class LogprobsContentDeltaEvent(BaseModel):
+    type: Literal["logprobs.content.delta"]
+
+    content: List[ChatCompletionTokenLogprob]
+
+    snapshot: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsContentDoneEvent(BaseModel):
+    type: Literal["logprobs.content.done"]
+
+    content: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsRefusalDeltaEvent(BaseModel):
+    type: Literal["logprobs.refusal.delta"]
+
+    refusal: List[ChatCompletionTokenLogprob]
+
+    snapshot: List[ChatCompletionTokenLogprob]
+
+
+class LogprobsRefusalDoneEvent(BaseModel):
+    type: Literal["logprobs.refusal.done"]
+
+    refusal: List[ChatCompletionTokenLogprob]
+
+
+ChatCompletionStreamEvent = Union[
+    ChunkEvent,
+    ContentDeltaEvent,
+    ContentDoneEvent[ResponseFormatT],
+    RefusalDeltaEvent,
+    RefusalDoneEvent,
+    FunctionToolCallArgumentsDeltaEvent,
+    FunctionToolCallArgumentsDoneEvent,
+    LogprobsContentDeltaEvent,
+    LogprobsContentDoneEvent,
+    LogprobsRefusalDeltaEvent,
+    LogprobsRefusalDoneEvent,
+]
diff --git a/src/openai/lib/streaming/chat/_types.py b/src/openai/lib/streaming/chat/_types.py
new file mode 100644
index 0000000000..42552893a0
--- /dev/null
+++ b/src/openai/lib/streaming/chat/_types.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing_extensions import TypeAlias
+
+from ....types.chat import ParsedChoice, ParsedChatCompletion, ParsedChatCompletionMessage
+
+ParsedChatCompletionSnapshot: TypeAlias = ParsedChatCompletion[object]
+"""Snapshot type representing an in-progress accumulation of
+a `ParsedChatCompletion` object.
+"""
+
+ParsedChatCompletionMessageSnapshot: TypeAlias = ParsedChatCompletionMessage[object]
+"""Snapshot type representing an in-progress accumulation of
+a `ParsedChatCompletionMessage` object.
+
+If the content has been fully accumulated, the `.parsed` content will be
+the `response_format` instance, otherwise it'll be the raw JSON parsed version.
+"""
+
+ParsedChoiceSnapshot: TypeAlias = ParsedChoice[object]
diff --git a/src/openai/lib/streaming/responses/__init__.py b/src/openai/lib/streaming/responses/__init__.py
new file mode 100644
index 0000000000..ff073633bf
--- /dev/null
+++ b/src/openai/lib/streaming/responses/__init__.py
@@ -0,0 +1,13 @@
+from ._events import (
+    ResponseTextDoneEvent as ResponseTextDoneEvent,
+    ResponseTextDeltaEvent as ResponseTextDeltaEvent,
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from ._responses import (
+    ResponseStream as ResponseStream,
+    AsyncResponseStream as AsyncResponseStream,
+    ResponseStreamEvent as ResponseStreamEvent,
+    ResponseStreamState as ResponseStreamState,
+    ResponseStreamManager as ResponseStreamManager,
+    AsyncResponseStreamManager as AsyncResponseStreamManager,
+)
diff --git a/src/openai/lib/streaming/responses/_events.py b/src/openai/lib/streaming/responses/_events.py
new file mode 100644
index 0000000000..6e547815e2
--- /dev/null
+++ b/src/openai/lib/streaming/responses/_events.py
@@ -0,0 +1,148 @@
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Union, Generic, TypeVar, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._compat import GenericModel
+from ....types.responses import (
+    ParsedResponse,
+    ResponseErrorEvent,
+    ResponseFailedEvent,
+    ResponseQueuedEvent,
+    ResponseCreatedEvent,
+    ResponseTextDoneEvent as RawResponseTextDoneEvent,
+    ResponseAudioDoneEvent,
+    ResponseCompletedEvent as RawResponseCompletedEvent,
+    ResponseTextDeltaEvent as RawResponseTextDeltaEvent,
+    ResponseAudioDeltaEvent,
+    ResponseIncompleteEvent,
+    ResponseInProgressEvent,
+    ResponseRefusalDoneEvent,
+    ResponseRefusalDeltaEvent,
+    ResponseMcpCallFailedEvent,
+    ResponseReasoningDoneEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseReasoningDeltaEvent,
+    ResponseContentPartDoneEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseContentPartAddedEvent,
+    ResponseMcpCallCompletedEvent,
+    ResponseMcpCallInProgressEvent,
+    ResponseMcpListToolsFailedEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseMcpCallArgumentsDoneEvent,
+    ResponseReasoningSummaryDoneEvent,
+    ResponseImageGenCallCompletedEvent,
+    ResponseMcpCallArgumentsDeltaEvent,
+    ResponseMcpListToolsCompletedEvent,
+    ResponseReasoningSummaryDeltaEvent,
+    ResponseImageGenCallGeneratingEvent,
+    ResponseImageGenCallInProgressEvent,
+    ResponseMcpListToolsInProgressEvent,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseWebSearchCallSearchingEvent,
+    ResponseFileSearchCallCompletedEvent,
+    ResponseFileSearchCallSearchingEvent,
+    ResponseWebSearchCallInProgressEvent,
+    ResponseFileSearchCallInProgressEvent,
+    ResponseImageGenCallPartialImageEvent,
+    ResponseReasoningSummaryPartDoneEvent,
+    ResponseReasoningSummaryTextDoneEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseOutputTextAnnotationAddedEvent,
+    ResponseReasoningSummaryPartAddedEvent,
+    ResponseReasoningSummaryTextDeltaEvent,
+    ResponseFunctionCallArgumentsDeltaEvent as RawResponseFunctionCallArgumentsDeltaEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallInterpretingEvent,
+)
+
+TextFormatT = TypeVar(
+    "TextFormatT",
+    # if it isn't given then we don't do any parsing
+    default=None,
+)
+
+
+class ResponseTextDeltaEvent(RawResponseTextDeltaEvent):
+    snapshot: str
+
+
+class ResponseTextDoneEvent(RawResponseTextDoneEvent, GenericModel, Generic[TextFormatT]):
+    parsed: Optional[TextFormatT] = None
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(RawResponseFunctionCallArgumentsDeltaEvent):
+    snapshot: str
+
+
+class ResponseCompletedEvent(RawResponseCompletedEvent, GenericModel, Generic[TextFormatT]):
+    response: ParsedResponse[TextFormatT]  # type: ignore[assignment]
+
+
+ResponseStreamEvent: TypeAlias = Annotated[
+    Union[
+        # wrappers with snapshots added on
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent[TextFormatT],
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseCompletedEvent[TextFormatT],
+        # the same as the non-accumulated API
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseCodeInterpreterCallCodeDeltaEvent,
+        ResponseCodeInterpreterCallCodeDoneEvent,
+        ResponseCodeInterpreterCallCompletedEvent,
+        ResponseCodeInterpreterCallInProgressEvent,
+        ResponseCodeInterpreterCallInterpretingEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseErrorEvent,
+        ResponseFileSearchCallCompletedEvent,
+        ResponseFileSearchCallInProgressEvent,
+        ResponseFileSearchCallSearchingEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseInProgressEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseRefusalDeltaEvent,
+        ResponseRefusalDoneEvent,
+        ResponseTextDoneEvent,
+        ResponseWebSearchCallCompletedEvent,
+        ResponseWebSearchCallInProgressEvent,
+        ResponseWebSearchCallSearchingEvent,
+        ResponseReasoningSummaryPartAddedEvent,
+        ResponseReasoningSummaryPartDoneEvent,
+        ResponseReasoningSummaryTextDeltaEvent,
+        ResponseReasoningSummaryTextDoneEvent,
+        ResponseImageGenCallCompletedEvent,
+        ResponseImageGenCallInProgressEvent,
+        ResponseImageGenCallGeneratingEvent,
+        ResponseImageGenCallPartialImageEvent,
+        ResponseMcpCallCompletedEvent,
+        ResponseMcpCallArgumentsDeltaEvent,
+        ResponseMcpCallArgumentsDoneEvent,
+        ResponseMcpCallFailedEvent,
+        ResponseMcpCallInProgressEvent,
+        ResponseMcpListToolsCompletedEvent,
+        ResponseMcpListToolsFailedEvent,
+        ResponseMcpListToolsInProgressEvent,
+        ResponseOutputTextAnnotationAddedEvent,
+        ResponseQueuedEvent,
+        ResponseReasoningDeltaEvent,
+        ResponseReasoningSummaryDeltaEvent,
+        ResponseReasoningSummaryDoneEvent,
+        ResponseReasoningDoneEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/lib/streaming/responses/_responses.py b/src/openai/lib/streaming/responses/_responses.py
new file mode 100644
index 0000000000..2c2fec5469
--- /dev/null
+++ b/src/openai/lib/streaming/responses/_responses.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import inspect
+from types import TracebackType
+from typing import Any, List, Generic, Iterable, Awaitable, cast
+from typing_extensions import Self, Callable, Iterator, AsyncIterator
+
+from ._types import ParsedResponseSnapshot
+from ._events import (
+    ResponseStreamEvent,
+    ResponseTextDoneEvent,
+    ResponseCompletedEvent,
+    ResponseTextDeltaEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+)
+from ...._types import NOT_GIVEN, NotGiven
+from ...._utils import is_given, consume_sync_iterator, consume_async_iterator
+from ...._models import build, construct_type_unchecked
+from ...._streaming import Stream, AsyncStream
+from ....types.responses import ParsedResponse, ResponseStreamEvent as RawResponseStreamEvent
+from ..._parsing._responses import TextFormatT, parse_text, parse_response
+from ....types.responses.tool_param import ToolParam
+from ....types.responses.parsed_response import (
+    ParsedContent,
+    ParsedResponseOutputMessage,
+    ParsedResponseFunctionToolCall,
+)
+
+
+class ResponseStream(Generic[TextFormatT]):
+    def __init__(
+        self,
+        *,
+        raw_stream: Stream[RawResponseStreamEvent],
+        text_format: type[TextFormatT] | NotGiven,
+        input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ResponseStreamState(text_format=text_format, input_tools=input_tools)
+        self._starting_after = starting_after
+
+    def __next__(self) -> ResponseStreamEvent[TextFormatT]:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[ResponseStreamEvent[TextFormatT]]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __stream__(self) -> Iterator[ResponseStreamEvent[TextFormatT]]:
+        for sse_event in self._raw_stream:
+            events_to_fire = self._state.handle_event(sse_event)
+            for event in events_to_fire:
+                if self._starting_after is None or event.sequence_number > self._starting_after:
+                    yield event
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self._response.close()
+
+    def get_final_response(self) -> ParsedResponse[TextFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedResponse` object.
+        """
+        self.until_done()
+        response = self._state._completed_response
+        if not response:
+            raise RuntimeError("Didn't receive a `response.completed` event.")
+
+        return response
+
+    def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        consume_sync_iterator(self)
+        return self
+
+
+class ResponseStreamManager(Generic[TextFormatT]):
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[RawResponseStreamEvent]],
+        *,
+        text_format: type[TextFormatT] | NotGiven,
+        input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
+    ) -> None:
+        self.__stream: ResponseStream[TextFormatT] | None = None
+        self.__api_request = api_request
+        self.__text_format = text_format
+        self.__input_tools = input_tools
+        self.__starting_after = starting_after
+
+    def __enter__(self) -> ResponseStream[TextFormatT]:
+        raw_stream = self.__api_request()
+
+        self.__stream = ResponseStream(
+            raw_stream=raw_stream,
+            text_format=self.__text_format,
+            input_tools=self.__input_tools,
+            starting_after=self.__starting_after,
+        )
+
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncResponseStream(Generic[TextFormatT]):
+    def __init__(
+        self,
+        *,
+        raw_stream: AsyncStream[RawResponseStreamEvent],
+        text_format: type[TextFormatT] | NotGiven,
+        input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
+    ) -> None:
+        self._raw_stream = raw_stream
+        self._response = raw_stream.response
+        self._iterator = self.__stream__()
+        self._state = ResponseStreamState(text_format=text_format, input_tools=input_tools)
+        self._starting_after = starting_after
+
+    async def __anext__(self) -> ResponseStreamEvent[TextFormatT]:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[ResponseStreamEvent[TextFormatT]]:
+        async for item in self._iterator:
+            yield item
+
+    async def __stream__(self) -> AsyncIterator[ResponseStreamEvent[TextFormatT]]:
+        async for sse_event in self._raw_stream:
+            events_to_fire = self._state.handle_event(sse_event)
+            for event in events_to_fire:
+                if self._starting_after is None or event.sequence_number > self._starting_after:
+                    yield event
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self._response.aclose()
+
+    async def get_final_response(self) -> ParsedResponse[TextFormatT]:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `ParsedResponse` object.
+        """
+        await self.until_done()
+        response = self._state._completed_response
+        if not response:
+            raise RuntimeError("Didn't receive a `response.completed` event.")
+
+        return response
+
+    async def until_done(self) -> Self:
+        """Blocks until the stream has been consumed."""
+        await consume_async_iterator(self)
+        return self
+
+
+class AsyncResponseStreamManager(Generic[TextFormatT]):
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[RawResponseStreamEvent]],
+        *,
+        text_format: type[TextFormatT] | NotGiven,
+        input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
+    ) -> None:
+        self.__stream: AsyncResponseStream[TextFormatT] | None = None
+        self.__api_request = api_request
+        self.__text_format = text_format
+        self.__input_tools = input_tools
+        self.__starting_after = starting_after
+
+    async def __aenter__(self) -> AsyncResponseStream[TextFormatT]:
+        raw_stream = await self.__api_request
+
+        self.__stream = AsyncResponseStream(
+            raw_stream=raw_stream,
+            text_format=self.__text_format,
+            input_tools=self.__input_tools,
+            starting_after=self.__starting_after,
+        )
+
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+class ResponseStreamState(Generic[TextFormatT]):
+    def __init__(
+        self,
+        *,
+        input_tools: Iterable[ToolParam] | NotGiven,
+        text_format: type[TextFormatT] | NotGiven,
+    ) -> None:
+        self.__current_snapshot: ParsedResponseSnapshot | None = None
+        self._completed_response: ParsedResponse[TextFormatT] | None = None
+        self._input_tools = [tool for tool in input_tools] if is_given(input_tools) else []
+        self._text_format = text_format
+        self._rich_text_format: type | NotGiven = text_format if inspect.isclass(text_format) else NOT_GIVEN
+
+    def handle_event(self, event: RawResponseStreamEvent) -> List[ResponseStreamEvent[TextFormatT]]:
+        self.__current_snapshot = snapshot = self.accumulate_event(event)
+
+        events: List[ResponseStreamEvent[TextFormatT]] = []
+
+        if event.type == "response.output_text.delta":
+            output = snapshot.output[event.output_index]
+            assert output.type == "message"
+
+            content = output.content[event.content_index]
+            assert content.type == "output_text"
+
+            events.append(
+                build(
+                    ResponseTextDeltaEvent,
+                    content_index=event.content_index,
+                    delta=event.delta,
+                    item_id=event.item_id,
+                    output_index=event.output_index,
+                    sequence_number=event.sequence_number,
+                    type="response.output_text.delta",
+                    snapshot=content.text,
+                )
+            )
+        elif event.type == "response.output_text.done":
+            output = snapshot.output[event.output_index]
+            assert output.type == "message"
+
+            content = output.content[event.content_index]
+            assert content.type == "output_text"
+
+            events.append(
+                build(
+                    ResponseTextDoneEvent[TextFormatT],
+                    content_index=event.content_index,
+                    item_id=event.item_id,
+                    output_index=event.output_index,
+                    sequence_number=event.sequence_number,
+                    type="response.output_text.done",
+                    text=event.text,
+                    parsed=parse_text(event.text, text_format=self._text_format),
+                )
+            )
+        elif event.type == "response.function_call_arguments.delta":
+            output = snapshot.output[event.output_index]
+            assert output.type == "function_call"
+
+            events.append(
+                build(
+                    ResponseFunctionCallArgumentsDeltaEvent,
+                    delta=event.delta,
+                    item_id=event.item_id,
+                    output_index=event.output_index,
+                    sequence_number=event.sequence_number,
+                    type="response.function_call_arguments.delta",
+                    snapshot=output.arguments,
+                )
+            )
+
+        elif event.type == "response.completed":
+            response = self._completed_response
+            assert response is not None
+
+            events.append(
+                build(
+                    ResponseCompletedEvent,
+                    sequence_number=event.sequence_number,
+                    type="response.completed",
+                    response=response,
+                )
+            )
+        else:
+            events.append(event)
+
+        return events
+
+    def accumulate_event(self, event: RawResponseStreamEvent) -> ParsedResponseSnapshot:
+        snapshot = self.__current_snapshot
+        if snapshot is None:
+            return self._create_initial_response(event)
+
+        if event.type == "response.output_item.added":
+            if event.item.type == "function_call":
+                snapshot.output.append(
+                    construct_type_unchecked(
+                        type_=cast(Any, ParsedResponseFunctionToolCall), value=event.item.to_dict()
+                    )
+                )
+            elif event.item.type == "message":
+                snapshot.output.append(
+                    construct_type_unchecked(type_=cast(Any, ParsedResponseOutputMessage), value=event.item.to_dict())
+                )
+            else:
+                snapshot.output.append(event.item)
+        elif event.type == "response.content_part.added":
+            output = snapshot.output[event.output_index]
+            if output.type == "message":
+                output.content.append(
+                    construct_type_unchecked(type_=cast(Any, ParsedContent), value=event.part.to_dict())
+                )
+        elif event.type == "response.output_text.delta":
+            output = snapshot.output[event.output_index]
+            if output.type == "message":
+                content = output.content[event.content_index]
+                assert content.type == "output_text"
+                content.text += event.delta
+        elif event.type == "response.function_call_arguments.delta":
+            output = snapshot.output[event.output_index]
+            if output.type == "function_call":
+                output.arguments += event.delta
+        elif event.type == "response.completed":
+            self._completed_response = parse_response(
+                text_format=self._text_format,
+                response=event.response,
+                input_tools=self._input_tools,
+            )
+
+        return snapshot
+
+    def _create_initial_response(self, event: RawResponseStreamEvent) -> ParsedResponseSnapshot:
+        if event.type != "response.created":
+            raise RuntimeError(f"Expected to have received `response.created` before `{event.type}`")
+
+        return construct_type_unchecked(type_=ParsedResponseSnapshot, value=event.response.to_dict())
diff --git a/src/openai/lib/streaming/responses/_types.py b/src/openai/lib/streaming/responses/_types.py
new file mode 100644
index 0000000000..6d3fd90e40
--- /dev/null
+++ b/src/openai/lib/streaming/responses/_types.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+from typing_extensions import TypeAlias
+
+from ....types.responses import ParsedResponse
+
+ParsedResponseSnapshot: TypeAlias = ParsedResponse[object]
+"""Snapshot type representing an in-progress accumulation of
+a `ParsedResponse` object.
+"""
diff --git a/src/openai/pagination.py b/src/openai/pagination.py
index 4ec300f2d1..a59cced854 100644
--- a/src/openai/pagination.py
+++ b/src/openai/pagination.py
@@ -1,31 +1,32 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Any, List, Generic, TypeVar, Optional, cast
-from typing_extensions import Literal, Protocol, override, runtime_checkable
+from typing_extensions import Protocol, override, runtime_checkable
 
-from ._types import ModelT
-from ._models import BaseModel
 from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
 
 __all__ = ["SyncPage", "AsyncPage", "SyncCursorPage", "AsyncCursorPage"]
 
-_BaseModelT = TypeVar("_BaseModelT", bound=BaseModel)
+_T = TypeVar("_T")
 
 
 @runtime_checkable
 class CursorPageItem(Protocol):
-    id: str
+    id: Optional[str]
 
 
-class SyncPage(BaseSyncPage[ModelT], BasePage[ModelT], Generic[ModelT]):
+class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
     """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
 
-    data: List[ModelT]
-    object: Literal["list"]
+    data: List[_T]
+    object: str
 
     @override
-    def _get_page_items(self) -> List[ModelT]:
-        return self.data
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
 
     @override
     def next_page_info(self) -> None:
@@ -36,15 +37,18 @@ def next_page_info(self) -> None:
         return None
 
 
-class AsyncPage(BaseAsyncPage[ModelT], BasePage[ModelT], Generic[ModelT]):
+class AsyncPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
     """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
 
-    data: List[ModelT]
-    object: Literal["list"]
+    data: List[_T]
+    object: str
 
     @override
-    def _get_page_items(self) -> List[ModelT]:
-        return self.data
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
 
     @override
     def next_page_info(self) -> None:
@@ -55,40 +59,66 @@ def next_page_info(self) -> None:
         return None
 
 
-class SyncCursorPage(BaseSyncPage[ModelT], BasePage[ModelT], Generic[ModelT]):
-    data: List[ModelT]
+class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
-    def _get_page_items(self) -> List[ModelT]:
-        return self.data
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
 
     @override
     def next_page_info(self) -> Optional[PageInfo]:
-        if not self.data:
+        data = self.data
+        if not data:
             return None
 
-        item = cast(Any, self.data[-1])
-        if not isinstance(item, CursorPageItem):
+        item = cast(Any, data[-1])
+        if not isinstance(item, CursorPageItem) or item.id is None:
             # TODO emit warning log
             return None
 
         return PageInfo(params={"after": item.id})
 
 
-class AsyncCursorPage(BaseAsyncPage[ModelT], BasePage[ModelT], Generic[ModelT]):
-    data: List[ModelT]
+class AsyncCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
-    def _get_page_items(self) -> List[ModelT]:
-        return self.data
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
 
     @override
     def next_page_info(self) -> Optional[PageInfo]:
-        if not self.data:
+        data = self.data
+        if not data:
             return None
 
-        item = cast(Any, self.data[-1])
-        if not isinstance(item, CursorPageItem):
+        item = cast(Any, data[-1])
+        if not isinstance(item, CursorPageItem) or item.id is None:
             # TODO emit warning log
             return None
 
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index e0f4f08d5c..82c9f037d9 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -1,51 +1,124 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .beta import Beta, AsyncBeta, BetaWithRawResponse, AsyncBetaWithRawResponse
-from .chat import Chat, AsyncChat, ChatWithRawResponse, AsyncChatWithRawResponse
-from .audio import Audio, AsyncAudio, AudioWithRawResponse, AsyncAudioWithRawResponse
-from .edits import Edits, AsyncEdits, EditsWithRawResponse, AsyncEditsWithRawResponse
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
 from .images import (
     Images,
     AsyncImages,
     ImagesWithRawResponse,
     AsyncImagesWithRawResponse,
+    ImagesWithStreamingResponse,
+    AsyncImagesWithStreamingResponse,
 )
 from .models import (
     Models,
     AsyncModels,
     ModelsWithRawResponse,
     AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+from .containers import (
+    Containers,
+    AsyncContainers,
+    ContainersWithRawResponse,
+    AsyncContainersWithRawResponse,
+    ContainersWithStreamingResponse,
+    AsyncContainersWithStreamingResponse,
 )
 from .embeddings import (
     Embeddings,
     AsyncEmbeddings,
     EmbeddingsWithRawResponse,
     AsyncEmbeddingsWithRawResponse,
-)
-from .fine_tunes import (
-    FineTunes,
-    AsyncFineTunes,
-    FineTunesWithRawResponse,
-    AsyncFineTunesWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
 )
 from .completions import (
     Completions,
     AsyncCompletions,
     CompletionsWithRawResponse,
     AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
 )
 from .fine_tuning import (
     FineTuning,
     AsyncFineTuning,
     FineTuningWithRawResponse,
     AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
 )
 from .moderations import (
     Moderations,
     AsyncModerations,
     ModerationsWithRawResponse,
     AsyncModerationsWithRawResponse,
+    ModerationsWithStreamingResponse,
+    AsyncModerationsWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
 )
 
 __all__ = [
@@ -53,48 +126,90 @@
     "AsyncCompletions",
     "CompletionsWithRawResponse",
     "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
     "Chat",
     "AsyncChat",
     "ChatWithRawResponse",
     "AsyncChatWithRawResponse",
-    "Edits",
-    "AsyncEdits",
-    "EditsWithRawResponse",
-    "AsyncEditsWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
     "Embeddings",
     "AsyncEmbeddings",
     "EmbeddingsWithRawResponse",
     "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
     "Files",
     "AsyncFiles",
     "FilesWithRawResponse",
     "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
     "Images",
     "AsyncImages",
     "ImagesWithRawResponse",
     "AsyncImagesWithRawResponse",
+    "ImagesWithStreamingResponse",
+    "AsyncImagesWithStreamingResponse",
     "Audio",
     "AsyncAudio",
     "AudioWithRawResponse",
     "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
     "Moderations",
     "AsyncModerations",
     "ModerationsWithRawResponse",
     "AsyncModerationsWithRawResponse",
+    "ModerationsWithStreamingResponse",
+    "AsyncModerationsWithStreamingResponse",
     "Models",
     "AsyncModels",
     "ModelsWithRawResponse",
     "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
     "AsyncFineTuningWithRawResponse",
-    "FineTunes",
-    "AsyncFineTunes",
-    "FineTunesWithRawResponse",
-    "AsyncFineTunesWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
     "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
+    "Containers",
+    "AsyncContainers",
+    "ContainersWithRawResponse",
+    "AsyncContainersWithRawResponse",
+    "ContainersWithStreamingResponse",
+    "AsyncContainersWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/__init__.py b/src/openai/resources/audio/__init__.py
index 76547b5f34..7da1d2dbde 100644
--- a/src/openai/resources/audio/__init__.py
+++ b/src/openai/resources/audio/__init__.py
@@ -1,23 +1,36 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .audio import Audio, AsyncAudio, AudioWithRawResponse, AsyncAudioWithRawResponse
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
 from .speech import (
     Speech,
     AsyncSpeech,
     SpeechWithRawResponse,
     AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
 )
 from .translations import (
     Translations,
     AsyncTranslations,
     TranslationsWithRawResponse,
     AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
 )
 from .transcriptions import (
     Transcriptions,
     AsyncTranscriptions,
     TranscriptionsWithRawResponse,
     AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
 )
 
 __all__ = [
@@ -25,16 +38,24 @@
     "AsyncTranscriptions",
     "TranscriptionsWithRawResponse",
     "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
     "Translations",
     "AsyncTranslations",
     "TranslationsWithRawResponse",
     "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
     "Speech",
     "AsyncSpeech",
     "SpeechWithRawResponse",
     "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
     "Audio",
     "AsyncAudio",
     "AudioWithRawResponse",
     "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/audio.py b/src/openai/resources/audio/audio.py
index 6f7226ee59..383b7073bf 100644
--- a/src/openai/resources/audio/audio.py
+++ b/src/openai/resources/audio/audio.py
@@ -1,72 +1,166 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 from .speech import (
     Speech,
     AsyncSpeech,
     SpeechWithRawResponse,
     AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
 )
+from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from .translations import (
     Translations,
     AsyncTranslations,
     TranslationsWithRawResponse,
     AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
 )
 from .transcriptions import (
     Transcriptions,
     AsyncTranscriptions,
     TranscriptionsWithRawResponse,
     AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
 )
 
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
-
 __all__ = ["Audio", "AsyncAudio"]
 
 
 class Audio(SyncAPIResource):
-    transcriptions: Transcriptions
-    translations: Translations
-    speech: Speech
-    with_raw_response: AudioWithRawResponse
+    @cached_property
+    def transcriptions(self) -> Transcriptions:
+        return Transcriptions(self._client)
+
+    @cached_property
+    def translations(self) -> Translations:
+        return Translations(self._client)
+
+    @cached_property
+    def speech(self) -> Speech:
+        return Speech(self._client)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.transcriptions = Transcriptions(client)
-        self.translations = Translations(client)
-        self.speech = Speech(client)
-        self.with_raw_response = AudioWithRawResponse(self)
+    @cached_property
+    def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AudioWithStreamingResponse(self)
 
 
 class AsyncAudio(AsyncAPIResource):
-    transcriptions: AsyncTranscriptions
-    translations: AsyncTranslations
-    speech: AsyncSpeech
-    with_raw_response: AsyncAudioWithRawResponse
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptions:
+        return AsyncTranscriptions(self._client)
+
+    @cached_property
+    def translations(self) -> AsyncTranslations:
+        return AsyncTranslations(self._client)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.transcriptions = AsyncTranscriptions(client)
-        self.translations = AsyncTranslations(client)
-        self.speech = AsyncSpeech(client)
-        self.with_raw_response = AsyncAudioWithRawResponse(self)
+    @cached_property
+    def speech(self) -> AsyncSpeech:
+        return AsyncSpeech(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAudioWithStreamingResponse(self)
 
 
 class AudioWithRawResponse:
     def __init__(self, audio: Audio) -> None:
-        self.transcriptions = TranscriptionsWithRawResponse(audio.transcriptions)
-        self.translations = TranslationsWithRawResponse(audio.translations)
-        self.speech = SpeechWithRawResponse(audio.speech)
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self._audio.speech)
 
 
 class AsyncAudioWithRawResponse:
     def __init__(self, audio: AsyncAudio) -> None:
-        self.transcriptions = AsyncTranscriptionsWithRawResponse(audio.transcriptions)
-        self.translations = AsyncTranslationsWithRawResponse(audio.translations)
-        self.speech = AsyncSpeechWithRawResponse(audio.speech)
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self._audio.speech)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self._audio.speech)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self._audio.speech)
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index 458843866f..fe776baae8 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -1,47 +1,69 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Union
+from typing import Union
 from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
 from ...types.audio import speech_create_params
-from ..._base_client import HttpxBinaryResponseContent, make_request_options
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from ..._base_client import make_request_options
+from ...types.audio.speech_model import SpeechModel
 
 __all__ = ["Speech", "AsyncSpeech"]
 
 
 class Speech(SyncAPIResource):
-    with_raw_response: SpeechWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SpeechWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = SpeechWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SpeechWithStreamingResponse(self)
 
     def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
+        model: Union[str, SpeechModel],
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ],
+        instructions: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
+        stream_format: Literal["sse", "audio"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
 
@@ -49,17 +71,26 @@ def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
+              `verse`. Previews of the voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
 
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
 
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
 
+          stream_format: The format to stream the audio in. Supported formats are `sse` and `audio`.
+              `sse` is not supported for `tts-1` or `tts-1-hd`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -68,6 +99,7 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
         return self._post(
             "/audio/speech",
             body=maybe_transform(
@@ -75,40 +107,59 @@ def create(
                     "input": input,
                     "model": model,
                     "voice": voice,
+                    "instructions": instructions,
                     "response_format": response_format,
                     "speed": speed,
+                    "stream_format": stream_format,
                 },
                 speech_create_params.SpeechCreateParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
 
 class AsyncSpeech(AsyncAPIResource):
-    with_raw_response: AsyncSpeechWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSpeechWithRawResponse(self)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncSpeechWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSpeechWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac"] | NotGiven = NOT_GIVEN,
+        model: Union[str, SpeechModel],
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ],
+        instructions: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
+        stream_format: Literal["sse", "audio"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
 
@@ -116,17 +167,26 @@ async def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
+              `verse`. Previews of the voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
 
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
+          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
+              `wav`, and `pcm`.
 
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
 
+          stream_format: The format to stream the audio in. Supported formats are `sse` and `audio`.
+              `sse` is not supported for `tts-1` or `tts-1-hd`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -135,34 +195,61 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
         return await self._post(
             "/audio/speech",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "input": input,
                     "model": model,
                     "voice": voice,
+                    "instructions": instructions,
                     "response_format": response_format,
                     "speed": speed,
+                    "stream_format": stream_format,
                 },
                 speech_create_params.SpeechCreateParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
 
 class SpeechWithRawResponse:
     def __init__(self, speech: Speech) -> None:
-        self.create = to_raw_response_wrapper(
+        self._speech = speech
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             speech.create,
         )
 
 
 class AsyncSpeechWithRawResponse:
     def __init__(self, speech: AsyncSpeech) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._speech = speech
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
             speech.create,
+            AsyncStreamedBinaryAPIResponse,
         )
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index d2b4452411..208f6e8b05 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -1,48 +1,140 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Union, Mapping, cast
-from typing_extensions import Literal
+import logging
+from typing import TYPE_CHECKING, List, Union, Mapping, Optional, cast
+from typing_extensions import Literal, overload, assert_never
 
 import httpx
 
+from ... import _legacy_response
+from ...types import AudioResponseFormat
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal
+from ..._utils import extract_files, required_args, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ...types.audio import Transcription, transcription_create_params
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._streaming import Stream, AsyncStream
+from ...types.audio import transcription_create_params
 from ..._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from ...types.audio_model import AudioModel
+from ...types.audio.transcription import Transcription
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.transcription_include import TranscriptionInclude
+from ...types.audio.transcription_verbose import TranscriptionVerbose
+from ...types.audio.transcription_stream_event import TranscriptionStreamEvent
+from ...types.audio.transcription_create_response import TranscriptionCreateResponse
 
 __all__ = ["Transcriptions", "AsyncTranscriptions"]
 
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
 
 class Transcriptions(SyncAPIResource):
-    with_raw_response: TranscriptionsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranscriptionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = TranscriptionsWithRawResponse(self)
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        response_format: Literal["text", "srt", "vtt"],
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
 
+    @overload
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+    ) -> Stream[TranscriptionStreamEvent]:
         """
         Transcribes audio into the input language.
 
@@ -51,19 +143,42 @@ def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should match the audio language.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -71,6 +186,12 @@ def create(
               [log probability](https://en.wikipedia.org/wiki/Log_probability) to
               automatically increase the temperature until certain thresholds are hit.
 
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -79,57 +200,195 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
+                "include": include,
                 "language": language,
                 "prompt": prompt,
                 "response_format": response_format,
+                "stream": stream,
                 "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
-        return self._post(
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
             "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Transcription,
+            cast_to=_get_response_format_type(response_format),
+            stream=stream or False,
+            stream_cls=Stream[TranscriptionStreamEvent],
         )
 
 
 class AsyncTranscriptions(AsyncAPIResource):
-    with_raw_response: AsyncTranscriptionsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncTranscriptionsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranscriptionsWithStreamingResponse(self)
 
+    @overload
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+    ) -> TranscriptionCreateResponse:
         """
         Transcribes audio into the input language.
 
@@ -138,19 +397,42 @@ async def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should match the audio language.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -158,6 +440,225 @@ async def create(
               [log probability](https://en.wikipedia.org/wiki/Log_probability) to
               automatically increase the temperature until certain thresholds are hit.
 
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+        """
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["verbose_json"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        response_format: Literal["text", "srt", "vtt"],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -166,43 +667,116 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
+                "include": include,
                 "language": language,
                 "prompt": prompt,
                 "response_format": response_format,
+                "stream": stream,
                 "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=await async_maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Transcription,
+            cast_to=_get_response_format_type(response_format),
+            stream=stream or False,
+            stream_cls=AsyncStream[TranscriptionStreamEvent],
         )
 
 
 class TranscriptionsWithRawResponse:
     def __init__(self, transcriptions: Transcriptions) -> None:
-        self.create = to_raw_response_wrapper(
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             transcriptions.create,
         )
 
 
 class AsyncTranscriptionsWithRawResponse:
     def __init__(self, transcriptions: AsyncTranscriptions) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._transcriptions = transcriptions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_streamed_response_wrapper(
             transcriptions.create,
         )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Transcription | TranscriptionVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Transcription
+
+    if response_format == "json":
+        return Transcription
+    elif response_format == "verbose_json":
+        return TranscriptionVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index fe7f7f2a40..28b577ce2e 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -1,39 +1,109 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING, Union, Mapping, cast
-from typing_extensions import Literal
+from typing_extensions import Literal, overload, assert_never
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ...types.audio import Translation, translation_create_params
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...types.audio import translation_create_params
 from ..._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from ...types.audio_model import AudioModel
+from ...types.audio.translation import Translation
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.translation_verbose import TranslationVerbose
 
 __all__ = ["Translations", "AsyncTranslations"]
 
+log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
+
 
 class Translations(SyncAPIResource):
-    with_raw_response: TranslationsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranslationsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation: ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = TranslationsWithRawResponse(self)
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
 
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
+        response_format: Union[Literal["json", "text", "srt", "verbose_json", "vtt"], NotGiven] = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -41,7 +111,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+    ) -> Translation | TranslationVerbose | str:
         """
         Translates audio into English.
 
@@ -49,15 +119,16 @@ def create(
           file: The audio file object (not file name) translate, in one of these formats: flac,
               mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -83,37 +154,49 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
-        return self._post(
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(  # type: ignore[return-value]
             "/audio/translations",
             body=maybe_transform(body, translation_create_params.TranslationCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Translation,
+            cast_to=_get_response_format_type(response_format),
         )
 
 
 class AsyncTranslations(AsyncAPIResource):
-    with_raw_response: AsyncTranslationsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncTranslationsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranslationsWithStreamingResponse(self)
+
+    @overload
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
+        response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -121,7 +204,57 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+    ) -> Translation: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["verbose_json"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranslationVerbose: ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        response_format: Literal["text", "srt", "vtt"],
+        prompt: str | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str: ...
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation | TranslationVerbose | str:
         """
         Translates audio into English.
 
@@ -129,15 +262,16 @@ async def create(
           file: The audio file object (not file name) translate, in one of these formats: flac,
               mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` is currently available.
+          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
+              Whisper V2 model) is currently available.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -163,32 +297,71 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/translations",
-            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Translation,
+            cast_to=_get_response_format_type(response_format),
         )
 
 
 class TranslationsWithRawResponse:
     def __init__(self, translations: Translations) -> None:
-        self.create = to_raw_response_wrapper(
+        self._translations = translations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             translations.create,
         )
 
 
 class AsyncTranslationsWithRawResponse:
     def __init__(self, translations: AsyncTranslations) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._translations = translations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
             translations.create,
         )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+def _get_response_format_type(
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
+) -> type[Translation | TranslationVerbose | str]:
+    if isinstance(response_format, NotGiven) or response_format is None:  # pyright: ignore[reportUnnecessaryComparison]
+        return Translation
+
+    if response_format == "json":
+        return Translation
+    elif response_format == "verbose_json":
+        return TranslationVerbose
+    elif response_format == "srt" or response_format == "text" or response_format == "vtt":
+        return str
+    elif TYPE_CHECKING:  # type: ignore[unreachable]
+        assert_never(response_format)
+    else:
+        log.warn("Unexpected audio response format: %s", response_format)
+        return Transcription
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
new file mode 100644
index 0000000000..26ea498b31
--- /dev/null
+++ b/src/openai/resources/batches.py
@@ -0,0 +1,514 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import batch_list_params, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from ..types.batch import Batch
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.shared_params.metadata import Metadata
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Batch]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=SyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        input_file_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+              and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
+        """List your organization's batches.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/batches",
+            page=AsyncCursorPage[Batch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/src/openai/resources/beta/__init__.py b/src/openai/resources/beta/__init__.py
index 55ad243cca..87fea25267 100644
--- a/src/openai/resources/beta/__init__.py
+++ b/src/openai/resources/beta/__init__.py
@@ -1,17 +1,28 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .beta import Beta, AsyncBeta, BetaWithRawResponse, AsyncBetaWithRawResponse
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
 from .threads import (
     Threads,
     AsyncThreads,
     ThreadsWithRawResponse,
     AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
 )
 from .assistants import (
     Assistants,
     AsyncAssistants,
     AssistantsWithRawResponse,
     AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
 )
 
 __all__ = [
@@ -19,12 +30,18 @@
     "AsyncAssistants",
     "AssistantsWithRawResponse",
     "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
     "Threads",
     "AsyncThreads",
     "ThreadsWithRawResponse",
     "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
     "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
 ]
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
new file mode 100644
index 0000000000..9059d93616
--- /dev/null
+++ b/src/openai/resources/beta/assistants.py
@@ -0,0 +1,1013 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ...types.beta import (
+    assistant_list_params,
+    assistant_create_params,
+    assistant_update_params,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.assistant import Assistant
+from ...types.shared.chat_model import ChatModel
+from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.shared.reasoning_effort import ReasoningEffort
+from ...types.beta.assistant_tool_param import AssistantToolParam
+from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
+
+__all__ = ["Assistants", "AsyncAssistants"]
+
+
+class Assistants(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AssistantsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/assistants",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/assistants/{assistant_id}",
+            body=maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Assistant]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=SyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AsyncAssistants(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAssistantsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAssistantsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, ChatModel],
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Create an assistant with a model and instructions.
+
+        Args:
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          description: The description of the assistant. The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/assistants",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_create_params.AssistantCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def retrieve(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """
+        Retrieves an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    async def update(
+        self,
+        assistant_id: str,
+        *,
+        description: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Assistant:
+        """Modifies an assistant.
+
+        Args:
+          description: The description of the assistant.
+
+        The maximum length is 512 characters.
+
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
+              characters.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: ID of the model to use. You can use the
+              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
+              see all of your available models, or see our
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          name: The name of the assistant. The maximum length is 256 characters.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
+              assistant. Tools can be of types `code_interpreter`, `file_search`, or
+              `function`.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/assistants/{assistant_id}",
+            body=await async_maybe_transform(
+                {
+                    "description": description,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "name": name,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                },
+                assistant_update_params.AssistantUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Assistant,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
+        """Returns a list of assistants.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/assistants",
+            page=AsyncCursorPage[Assistant],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    assistant_list_params.AssistantListParams,
+                ),
+            ),
+            model=Assistant,
+        )
+
+    async def delete(
+        self,
+        assistant_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantDeleted:
+        """
+        Delete an assistant.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not assistant_id:
+            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/assistants/{assistant_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AssistantDeleted,
+        )
+
+
+class AssistantsWithRawResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithRawResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AssistantsWithStreamingResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self._assistants = assistants
+
+        self.create = to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithStreamingResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self._assistants = assistants
+
+        self.create = async_to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            assistants.delete,
+        )
diff --git a/src/openai/resources/beta/assistants/__init__.py b/src/openai/resources/beta/assistants/__init__.py
deleted file mode 100644
index 6efb0b21ec..0000000000
--- a/src/openai/resources/beta/assistants/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from .assistants import (
-    Assistants,
-    AsyncAssistants,
-    AssistantsWithRawResponse,
-    AsyncAssistantsWithRawResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "Assistants",
-    "AsyncAssistants",
-    "AssistantsWithRawResponse",
-    "AsyncAssistantsWithRawResponse",
-]
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants/assistants.py
deleted file mode 100644
index efa711ecf4..0000000000
--- a/src/openai/resources/beta/assistants/assistants.py
+++ /dev/null
@@ -1,656 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, List, Optional
-from typing_extensions import Literal
-
-import httpx
-
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import (
-    Assistant,
-    AssistantDeleted,
-    assistant_list_params,
-    assistant_create_params,
-    assistant_update_params,
-)
-from ...._base_client import AsyncPaginator, make_request_options
-
-if TYPE_CHECKING:
-    from ...._client import OpenAI, AsyncOpenAI
-
-__all__ = ["Assistants", "AsyncAssistants"]
-
-
-class Assistants(SyncAPIResource):
-    files: Files
-    with_raw_response: AssistantsWithRawResponse
-
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.files = Files(client)
-        self.with_raw_response = AssistantsWithRawResponse(self)
-
-    def create(
-        self,
-        *,
-        model: str,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Create an assistant with a model and instructions.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          description: The description of the assistant. The maximum length is 512 characters.
-
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            "/assistants",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_create_params.AssistantCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def retrieve(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Retrieves an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def update(
-        self,
-        assistant_id: str,
-        *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """Modifies an assistant.
-
-        Args:
-          description: The description of the assistant.
-
-        The maximum length is 512 characters.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previosuly attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/assistants/{assistant_id}",
-            body=maybe_transform(
-                {
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_update_params.AssistantUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[Assistant]:
-        """Returns a list of assistants.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            "/assistants",
-            page=SyncCursorPage[Assistant],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    assistant_list_params.AssistantListParams,
-                ),
-            ),
-            model=Assistant,
-        )
-
-    def delete(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantDeleted:
-        """
-        Delete an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._delete(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantDeleted,
-        )
-
-
-class AsyncAssistants(AsyncAPIResource):
-    files: AsyncFiles
-    with_raw_response: AsyncAssistantsWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.files = AsyncFiles(client)
-        self.with_raw_response = AsyncAssistantsWithRawResponse(self)
-
-    async def create(
-        self,
-        *,
-        model: str,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Create an assistant with a model and instructions.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          description: The description of the assistant. The maximum length is 512 characters.
-
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            "/assistants",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_create_params.AssistantCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    async def retrieve(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Retrieves an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    async def update(
-        self,
-        assistant_id: str,
-        *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: List[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """Modifies an assistant.
-
-        Args:
-          description: The description of the assistant.
-
-        The maximum length is 512 characters.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previosuly attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/assistants/{assistant_id}",
-            body=maybe_transform(
-                {
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_update_params.AssistantUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
-        """Returns a list of assistants.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            "/assistants",
-            page=AsyncCursorPage[Assistant],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    assistant_list_params.AssistantListParams,
-                ),
-            ),
-            model=Assistant,
-        )
-
-    async def delete(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantDeleted:
-        """
-        Delete an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._delete(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantDeleted,
-        )
-
-
-class AssistantsWithRawResponse:
-    def __init__(self, assistants: Assistants) -> None:
-        self.files = FilesWithRawResponse(assistants.files)
-
-        self.create = to_raw_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = to_raw_response_wrapper(
-            assistants.update,
-        )
-        self.list = to_raw_response_wrapper(
-            assistants.list,
-        )
-        self.delete = to_raw_response_wrapper(
-            assistants.delete,
-        )
-
-
-class AsyncAssistantsWithRawResponse:
-    def __init__(self, assistants: AsyncAssistants) -> None:
-        self.files = AsyncFilesWithRawResponse(assistants.files)
-
-        self.create = async_to_raw_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = async_to_raw_response_wrapper(
-            assistants.update,
-        )
-        self.list = async_to_raw_response_wrapper(
-            assistants.list,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            assistants.delete,
-        )
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index b552561763..4feaaab44b 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -1,60 +1,175 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-)
+from ..._compat import cached_property
 from .assistants import (
     Assistants,
     AsyncAssistants,
     AssistantsWithRawResponse,
     AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from .threads.threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from ...resources.chat import Chat, AsyncChat
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
-    assistants: Assistants
-    threads: Threads
-    with_raw_response: BetaWithRawResponse
+    @cached_property
+    def chat(self) -> Chat:
+        return Chat(self._client)
+
+    @cached_property
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
+
+    @cached_property
+    def assistants(self) -> Assistants:
+        return Assistants(self._client)
+
+    @cached_property
+    def threads(self) -> Threads:
+        return Threads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.assistants = Assistants(client)
-        self.threads = Threads(client)
-        self.with_raw_response = BetaWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return BetaWithStreamingResponse(self)
 
 
 class AsyncBeta(AsyncAPIResource):
-    assistants: AsyncAssistants
-    threads: AsyncThreads
-    with_raw_response: AsyncBetaWithRawResponse
+    @cached_property
+    def chat(self) -> AsyncChat:
+        return AsyncChat(self._client)
+
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistants:
+        return AsyncAssistants(self._client)
+
+    @cached_property
+    def threads(self) -> AsyncThreads:
+        return AsyncThreads(self._client)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.assistants = AsyncAssistants(client)
-        self.threads = AsyncThreads(client)
-        self.with_raw_response = AsyncBetaWithRawResponse(self)
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncBetaWithStreamingResponse(self)
 
 
 class BetaWithRawResponse:
     def __init__(self, beta: Beta) -> None:
-        self.assistants = AssistantsWithRawResponse(beta.assistants)
-        self.threads = ThreadsWithRawResponse(beta.threads)
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithRawResponse:
+        return AssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self._beta.threads)
 
 
 class AsyncBetaWithRawResponse:
     def __init__(self, beta: AsyncBeta) -> None:
-        self.assistants = AsyncAssistantsWithRawResponse(beta.assistants)
-        self.threads = AsyncThreadsWithRawResponse(beta.threads)
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithRawResponse:
+        return AsyncAssistantsWithRawResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self._beta.threads)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self._beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
+    @cached_property
+    def assistants(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/src/openai/resources/beta/realtime/__init__.py b/src/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 0000000000..7ab3d9931c
--- /dev/null
+++ b/src/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from .transcription_sessions import (
+    TranscriptionSessions,
+    AsyncTranscriptionSessions,
+    TranscriptionSessionsWithRawResponse,
+    AsyncTranscriptionSessionsWithRawResponse,
+    TranscriptionSessionsWithStreamingResponse,
+    AsyncTranscriptionSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "TranscriptionSessions",
+    "AsyncTranscriptionSessions",
+    "TranscriptionSessionsWithRawResponse",
+    "AsyncTranscriptionSessionsWithRawResponse",
+    "TranscriptionSessionsWithStreamingResponse",
+    "AsyncTranscriptionSessionsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 0000000000..8e1b558cf3
--- /dev/null
+++ b/src/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,1092 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+    is_azure_client,
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+    is_async_azure_client,
+)
+from ...._compat import cached_property
+from ...._models import construct_type_unchecked
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import (
+    session_update_event_param,
+    response_create_event_param,
+    transcription_session_update_param,
+)
+from .transcription_sessions import (
+    TranscriptionSessions,
+    AsyncTranscriptionSessions,
+    TranscriptionSessionsWithRawResponse,
+    AsyncTranscriptionSessionsWithRawResponse,
+    TranscriptionSessionsWithStreamingResponse,
+    AsyncTranscriptionSessionsWithStreamingResponse,
+)
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+    from websockets.sync.client import ClientConnection as WebsocketConnection
+    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+    from ...._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessions:
+        return TranscriptionSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> RealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return RealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessions:
+        return AsyncTranscriptionSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> AsyncRealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return AsyncRealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
+        return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
+        return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
+        return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+    @cached_property
+    def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+        return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
+
+
+class AsyncRealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: AsyncRealtimeSessionResource
+    response: AsyncRealtimeResponseResource
+    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+    conversation: AsyncRealtimeConversationResource
+    output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
+    transcription_session: AsyncRealtimeTranscriptionSessionResource
+
+    _connection: AsyncWebsocketConnection
+
+    def __init__(self, connection: AsyncWebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = AsyncRealtimeSessionResource(self)
+        self.response = AsyncRealtimeResponseResource(self)
+        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+        self.conversation = AsyncRealtimeConversationResource(self)
+        self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
+        self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
+
+    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield await self.recv()
+        except ConnectionClosedOK:
+            return
+
+    async def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(await self.recv_bytes())
+
+    async def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = await self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        return message
+
+    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+        )
+        await self._connection.send(data)
+
+    async def close(self, *, code: int = 1000, reason: str = "") -> None:
+        await self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class AsyncRealtimeConnectionManager:
+    """
+    Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = await client.beta.realtime.connect(...).enter()
+    # ...
+    await connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: AsyncOpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: AsyncRealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    async def __aenter__(self) -> AsyncRealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = await client.beta.realtime.connect(...).enter()
+        # ...
+        await connection.close()
+        ```
+        """
+        try:
+            from websockets.asyncio.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        extra_query = self.__extra_query
+        auth_headers = self.__client.auth_headers
+        if is_async_azure_client(self.__client):
+            url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
+        else:
+            url = self._prepare_url().copy_with(
+                params={
+                    **self.__client.base_url.params,
+                    "model": self.__model,
+                    **extra_query,
+                },
+            )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = AsyncRealtimeConnection(
+            await connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __aenter__
+
+    def _prepare_url(http://github.com/self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            await self.__connection.close()
+
+
+class RealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: RealtimeSessionResource
+    response: RealtimeResponseResource
+    input_audio_buffer: RealtimeInputAudioBufferResource
+    conversation: RealtimeConversationResource
+    output_audio_buffer: RealtimeOutputAudioBufferResource
+    transcription_session: RealtimeTranscriptionSessionResource
+
+    _connection: WebsocketConnection
+
+    def __init__(self, connection: WebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = RealtimeSessionResource(self)
+        self.response = RealtimeResponseResource(self)
+        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+        self.conversation = RealtimeConversationResource(self)
+        self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
+        self.transcription_session = RealtimeTranscriptionSessionResource(self)
+
+    def __iter__(self) -> Iterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield self.recv()
+        except ConnectionClosedOK:
+            return
+
+    def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(self.recv_bytes())
+
+    def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        return message
+
+    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+        )
+        self._connection.send(data)
+
+    def close(self, *, code: int = 1000, reason: str = "") -> None:
+        self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class RealtimeConnectionManager:
+    """
+    Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = client.beta.realtime.connect(...).enter()
+    # ...
+    connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: OpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: RealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    def __enter__(self) -> RealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = client.beta.realtime.connect(...).enter()
+        # ...
+        connection.close()
+        ```
+        """
+        try:
+            from websockets.sync.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        extra_query = self.__extra_query
+        auth_headers = self.__client.auth_headers
+        if is_azure_client(self.__client):
+            url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
+        else:
+            url = self._prepare_url().copy_with(
+                params={
+                    **self.__client.base_url.params,
+                    "model": self.__model,
+                    **extra_query,
+                },
+            )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = RealtimeConnection(
+            connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __enter__
+
+    def _prepare_url(http://github.com/self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+    def __init__(self, connection: RealtimeConnection) -> None:
+        self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+    def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+    def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> RealtimeConversationItemResource:
+        return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+    def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """**WebRTC Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
+
+
+class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
+    def update(
+        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to update a transcription session."""
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class BaseAsyncRealtimeConnectionResource:
+    def __init__(self, connection: AsyncRealtimeConnection) -> None:
+        self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+    async def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> AsyncRealtimeConversationItemResource:
+        return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+    async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    async def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """**WebRTC Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
+
+
+class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to update a transcription session."""
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
+            )
+        )
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 0000000000..77f1ec9059
--- /dev/null
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,426 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        client_secret: session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        tracing: session_create_params.Tracing | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          client_secret: Configuration options for the generated client secret.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          speed: The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+              minimum speed. 1.5 is the maximum speed. This value can only be changed in
+              between model turns, not while a response is in progress.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          tracing: Configuration options for tracing. Set to null to disable tracing. Once tracing
+              is enabled for a session, the configuration cannot be modified.
+
+              `auto` will create a trace for the session with default values for the workflow
+              name, group id, and metadata.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
+              `shimmer`, and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "client_secret": client_secret,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "speed": speed,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "tracing": tracing,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        client_secret: session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        tracing: session_create_params.Tracing | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ]
+        | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          client_secret: Configuration options for the generated client secret.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as guidance of input audio content rather than precisely
+              what the model heard. The client can optionally set the language and prompt for
+              transcription, these offer additional guidance to the transcription service.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          speed: The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
+              minimum speed. 1.5 is the maximum speed. This value can only be changed in
+              between model turns, not while a response is in progress.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
+              temperature of 0.8 is highly recommended for best performance.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          tracing: Configuration options for tracing. Set to null to disable tracing. Once tracing
+              is enabled for a session, the configuration cannot be modified.
+
+              `auto` will create a trace for the session with default values for the workflow
+              name, group id, and metadata.
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
+              `shimmer`, and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "client_secret": client_secret,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "speed": speed,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "tracing": tracing,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )
diff --git a/src/openai/resources/beta/realtime/transcription_sessions.py b/src/openai/resources/beta/realtime/transcription_sessions.py
new file mode 100644
index 0000000000..5f97b3c8e3
--- /dev/null
+++ b/src/openai/resources/beta/realtime/transcription_sessions.py
@@ -0,0 +1,282 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import transcription_session_create_params
+from ....types.beta.realtime.transcription_session import TranscriptionSession
+
+__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
+
+
+class TranscriptionSessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return TranscriptionSessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        client_secret: transcription_session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
+        include: List[str] | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+        | NotGiven = NOT_GIVEN,
+        input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionSession:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API specifically for realtime transcriptions. Can be configured with
+        the same session parameters as the `transcription_session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          client_secret: Configuration options for the generated client secret.
+
+          include:
+              The set of items to include in the transcription. Current available items are:
+
+              - `item.input_audio_transcription.logprobs`
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+              language and prompt for transcription, these offer additional guidance to the
+              transcription service.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/transcription_sessions",
+            body=maybe_transform(
+                {
+                    "client_secret": client_secret,
+                    "include": include,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "modalities": modalities,
+                    "turn_detection": turn_detection,
+                },
+                transcription_session_create_params.TranscriptionSessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TranscriptionSession,
+        )
+
+
+class AsyncTranscriptionSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncTranscriptionSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        client_secret: transcription_session_create_params.ClientSecret | NotGiven = NOT_GIVEN,
+        include: List[str] | NotGiven = NOT_GIVEN,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
+        | NotGiven = NOT_GIVEN,
+        input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> TranscriptionSession:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API specifically for realtime transcriptions. Can be configured with
+        the same session parameters as the `transcription_session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          client_secret: Configuration options for the generated client secret.
+
+          include:
+              The set of items to include in the transcription. Current available items are:
+
+              - `item.input_audio_transcription.logprobs`
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
+              off. Noise reduction filters audio added to the input audio buffer before it is
+              sent to VAD and the model. Filtering the audio can improve VAD and turn
+              detection accuracy (reducing false positives) and model performance by improving
+              perception of the input audio.
+
+          input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
+              language and prompt for transcription, these offer additional guidance to the
+              transcription service.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+              set to `null` to turn off, in which case the client must manually trigger model
+              response. Server VAD means that the model will detect the start and end of
+              speech based on audio volume and respond at the end of user speech. Semantic VAD
+              is more advanced and uses a turn detection model (in conjuction with VAD) to
+              semantically estimate whether the user has finished speaking, then dynamically
+              sets a timeout based on this probability. For example, if user audio trails off
+              with "uhhm", the model will score a low probability of turn end and wait longer
+              for the user to continue speaking. This can be useful for more natural
+              conversations, but may have a higher latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/transcription_sessions",
+            body=await async_maybe_transform(
+                {
+                    "client_secret": client_secret,
+                    "include": include,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_noise_reduction": input_audio_noise_reduction,
+                    "input_audio_transcription": input_audio_transcription,
+                    "modalities": modalities,
+                    "turn_detection": turn_detection,
+                },
+                transcription_session_create_params.TranscriptionSessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=TranscriptionSession,
+        )
+
+
+class TranscriptionSessionsWithRawResponse:
+    def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class AsyncTranscriptionSessionsWithRawResponse:
+    def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class TranscriptionSessionsWithStreamingResponse:
+    def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = to_streamed_response_wrapper(
+            transcription_sessions.create,
+        )
+
+
+class AsyncTranscriptionSessionsWithStreamingResponse:
+    def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
+        self._transcription_sessions = transcription_sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcription_sessions.create,
+        )
diff --git a/src/openai/resources/beta/threads/__init__.py b/src/openai/resources/beta/threads/__init__.py
index b9aaada465..a66e445b1f 100644
--- a/src/openai/resources/beta/threads/__init__.py
+++ b/src/openai/resources/beta/threads/__init__.py
@@ -1,17 +1,28 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
 from .threads import (
     Threads,
     AsyncThreads,
     ThreadsWithRawResponse,
     AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
 )
 from .messages import (
     Messages,
     AsyncMessages,
     MessagesWithRawResponse,
     AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
 )
 
 __all__ = [
@@ -19,12 +30,18 @@
     "AsyncRuns",
     "RunsWithRawResponse",
     "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
     "Threads",
     "AsyncThreads",
     "ThreadsWithRawResponse",
     "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
 ]
diff --git a/src/openai/resources/beta/threads/messages.py b/src/openai/resources/beta/threads/messages.py
new file mode 100644
index 0000000000..943d2e7f05
--- /dev/null
+++ b/src/openai/resources/beta/threads/messages.py
@@ -0,0 +1,718 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import typing_extensions
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.beta.threads import message_list_params, message_create_params, message_update_params
+from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.threads.message_deleted import MessageDeleted
+from ....types.beta.threads.message_content_part_param import MessageContentPartParam
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages",
+            body=maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Message]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=SyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        content: Union[str, Iterable[MessageContentPartParam]],
+        role: Literal["user", "assistant"],
+        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Create a message.
+
+        Args:
+          content: The text contents of the message.
+
+          role:
+              The role of the entity that is creating the message. Allowed values include:
+
+              - `user`: Indicates the message is sent by an actual user and should be used in
+                most cases to represent user-generated messages.
+              - `assistant`: Indicates the message is generated by the assistant. Use this
+                value to insert messages from the assistant into the conversation.
+
+          attachments: A list of files attached to the message, and the tools they should be added to.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages",
+            body=await async_maybe_transform(
+                {
+                    "content": content,
+                    "role": role,
+                    "attachments": attachments,
+                    "metadata": metadata,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def retrieve(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Retrieve a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def update(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Message:
+        """
+        Modifies a message.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/messages/{message_id}",
+            body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        run_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
+        """
+        Returns a list of messages for a given thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          run_id: Filter messages by the run ID that generated them.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/messages",
+            page=AsyncCursorPage[Message],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "run_id": run_id,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=Message,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def delete(
+        self,
+        message_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageDeleted:
+        """
+        Deletes a message.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not message_id:
+            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/threads/{thread_id}/messages/{message_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageDeleted,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
diff --git a/src/openai/resources/beta/threads/messages/__init__.py b/src/openai/resources/beta/threads/messages/__init__.py
deleted file mode 100644
index d8d4ce448c..0000000000
--- a/src/openai/resources/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "Messages",
-    "AsyncMessages",
-    "MessagesWithRawResponse",
-    "AsyncMessagesWithRawResponse",
-]
diff --git a/src/openai/resources/beta/threads/messages/files.py b/src/openai/resources/beta/threads/messages/files.py
deleted file mode 100644
index e028a6fda7..0000000000
--- a/src/openai/resources/beta/threads/messages/files.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from typing_extensions import Literal
-
-import httpx
-
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import AsyncPaginator, make_request_options
-from .....types.beta.threads.messages import MessageFile, file_list_params
-
-if TYPE_CHECKING:
-    from ....._client import OpenAI, AsyncOpenAI
-
-__all__ = ["Files", "AsyncFiles"]
-
-
-class Files(SyncAPIResource):
-    with_raw_response: FilesWithRawResponse
-
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = FilesWithRawResponse(self)
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[MessageFile]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=SyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class AsyncFiles(AsyncAPIResource):
-    with_raw_response: AsyncFilesWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncFilesWithRawResponse(self)
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[MessageFile, AsyncCursorPage[MessageFile]]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=AsyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class FilesWithRawResponse:
-    def __init__(self, files: Files) -> None:
-        self.retrieve = to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            files.list,
-        )
-
-
-class AsyncFilesWithRawResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            files.list,
-        )
diff --git a/src/openai/resources/beta/threads/runs/__init__.py b/src/openai/resources/beta/threads/runs/__init__.py
index 6b61813974..50aa9fae60 100644
--- a/src/openai/resources/beta/threads/runs/__init__.py
+++ b/src/openai/resources/beta/threads/runs/__init__.py
@@ -1,15 +1,33 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
-from .steps import Steps, AsyncSteps, StepsWithRawResponse, AsyncStepsWithRawResponse
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
 
 __all__ = [
     "Steps",
     "AsyncSteps",
     "StepsWithRawResponse",
     "AsyncStepsWithRawResponse",
+    "StepsWithStreamingResponse",
+    "AsyncStepsWithStreamingResponse",
     "Runs",
     "AsyncRuns",
     "RunsWithRawResponse",
     "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
 ]
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 969bfab70a..3d9ae9759e 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -1,51 +1,111 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, List, Optional
-from typing_extensions import Literal
+import typing_extensions
+from typing import List, Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
 
 import httpx
 
-from .steps import Steps, AsyncSteps, StepsWithRawResponse, AsyncStepsWithRawResponse
+from ..... import _legacy_response
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import AsyncPaginator, make_request_options
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
 from .....types.beta.threads import (
-    Run,
     run_list_params,
     run_create_params,
     run_update_params,
     run_submit_tool_outputs_params,
 )
-
-if TYPE_CHECKING:
-    from ....._client import OpenAI, AsyncOpenAI
+from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
+from .....types.beta.assistant_tool_param import AssistantToolParam
+from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
+from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["Runs", "AsyncRuns"]
 
 
 class Runs(SyncAPIResource):
-    steps: Steps
-    with_raw_response: RunsWithRawResponse
+    @cached_property
+    def steps(self) -> Steps:
+        return Steps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.steps = Steps(client)
-        self.with_raw_response = RunsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
 
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def create(
         self,
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[List[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -61,22 +121,410 @@ def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
-          instructions: Override the default system message of the assistant. This is useful for
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -85,25 +533,79 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
                 },
-                run_create_params.RunCreateParams,
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         run_id: str,
@@ -128,7 +630,11 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}",
             options=make_request_options(
@@ -137,12 +643,13 @@ def retrieve(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def update(
         self,
         run_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -155,9 +662,11 @@ def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -167,7 +676,11 @@ def update(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs/{run_id}",
             body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
@@ -177,6 +690,7 @@ def update(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         thread_id: str,
@@ -203,8 +717,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -220,7 +734,9 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs",
             page=SyncCursorPage[Run],
@@ -242,6 +758,7 @@ def list(
             model=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def cancel(
         self,
         run_id: str,
@@ -266,7 +783,11 @@ def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}/runs/{run_id}/cancel",
             options=make_request_options(
@@ -275,12 +796,29 @@ def cancel(
             cast_to=Run,
         )
 
-    def submit_tool_outputs(
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create_and_poll(
         self,
-        run_id: str,
         *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
-        tool_outputs: List[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -289,102 +827,172 @@ def submit_tool_outputs(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
+        run = self.create(  # pyright: ignore[reportDeprecated]
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(  # pyright: ignore[reportDeprecated]
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
         )
 
-
-class AsyncRuns(AsyncAPIResource):
-    steps: AsyncSteps
-    with_raw_response: AsyncRunsWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.steps = AsyncSteps(client)
-        self.with_raw_response = AsyncRunsWithRawResponse(self)
-
-    async def create(
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
         self,
-        thread_id: str,
         *,
         assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[List[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
 
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
             f"/threads/{thread_id}/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "top_p": top_p,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -392,115 +1000,1663 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
-    async def retrieve(
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def poll(
         self,
         run_id: str,
-        *,
         thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        Retrieves a run.
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
 
-        Args:
-          extra_headers: Send extra headers
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
 
-          extra_query: Add additional query parameters to the request
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = self.with_raw_response.retrieve(  # pyright: ignore[reportDeprecated]
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
 
-          extra_body: Add additional JSON properties to the request
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/runs/{run_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
 
-    async def update(
+            self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
         self,
-        run_id: str,
         *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.submit_tool_outputs(  # pyright: ignore[reportDeprecated]
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.poll(  # pyright: ignore[reportDeprecated]
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def update(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Modifies a run.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def list(
+        self,
+        thread_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create_and_poll(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a run an poll for a terminal state. More information on Run
+        lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create(  # pyright: ignore[reportDeprecated]
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            include=include,
+            additional_instructions=additional_instructions,
+            additional_messages=additional_messages,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            response_format=response_format,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            parallel_tool_calls=parallel_tool_calls,
+            reasoning_effort=reasoning_effort,
+            # We assume we are not streaming when polling
+            stream=False,
+            tools=tools,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(  # pyright: ignore[reportDeprecated]
+            run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            poll_interval_ms=poll_interval_ms,
+            timeout=timeout,
+        )
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("use `stream` instead")
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                    "parallel_tool_calls": parallel_tool_calls,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def poll(
+        self,
+        run_id: str,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        Modifies a run.
+        A helper to poll a run status until it reaches a terminal state. More
+        information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
 
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+        if is_given(poll_interval_ms):
+            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
 
-          extra_headers: Send extra headers
+        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
+        while True:
+            response = await self.with_raw_response.retrieve(  # pyright: ignore[reportDeprecated]
+                thread_id=thread_id,
+                run_id=run_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                extra_query=extra_query,
+                timeout=timeout,
+            )
 
-          extra_query: Add additional query parameters to the request
+            run = response.parse()
+            # Return if we reached a terminal state
+            if run.status in terminal_states:
+                return run
 
-          extra_body: Add additional JSON properties to the request
+            if not is_given(poll_interval_ms):
+                from_header = response.headers.get("openai-poll-after-ms")
+                if from_header is not None:
+                    poll_interval_ms = int(from_header)
+                else:
+                    poll_interval_ms = 1000
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}",
-            body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            await self._sleep(poll_interval_ms / 1000)
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def stream(
+        self,
+        *,
+        assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "additional_messages": additional_messages,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "tools": tools,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
+                },
+                run_create_params.RunCreateParams,
+            ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
-    def list(
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def submit_tool_outputs(
         self,
-        thread_id: str,
+        run_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+    ) -> Run:
         """
-        Returns a list of runs belonging to a thread.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
+          tool_outputs: A list of tools for which the outputs are being submitted.
 
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
 
           extra_headers: Send extra headers
 
@@ -510,44 +2666,37 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs",
-            page=AsyncCursorPage[Run],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    run_list_params.RunListParams,
-                ),
-            ),
-            model=Run,
-        )
+        ...
 
-    async def cancel(
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
+    ) -> AsyncStream[AssistantStreamEvent]:
         """
-        Cancels a run that is `in_progress`.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -556,28 +2705,24 @@ async def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
+        ...
 
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
-        tool_outputs: List[run_submit_tool_outputs_params.ToolOutput],
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
         `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
@@ -585,6 +2730,10 @@ async def submit_tool_outputs(
         request.
 
         Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
           tool_outputs: A list of tools for which the outputs are being submitted.
 
           extra_headers: Send extra headers
@@ -595,62 +2744,343 @@ async def submit_tool_outputs(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=await async_maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def submit_tool_outputs_and_poll(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to submit a tool output to a run and poll for a terminal run state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.submit_tool_outputs(  # pyright: ignore[reportDeprecated]
+            run_id=run_id,
+            thread_id=thread_id,
+            tool_outputs=tool_outputs,
+            stream=False,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.poll(  # pyright: ignore[reportDeprecated]
+            run_id=run.id,
+            thread_id=thread_id,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state. More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
             f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
             body=maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class RunsWithRawResponse:
     def __init__(self, runs: Runs) -> None:
-        self.steps = StepsWithRawResponse(runs.steps)
+        self._runs = runs
 
-        self.create = to_raw_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.create  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = to_raw_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = to_raw_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.update  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = to_raw_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.list  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = to_raw_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.cancel  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = to_raw_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.submit_tool_outputs  # pyright: ignore[reportDeprecated],
+            )
         )
 
+    @cached_property
+    def steps(self) -> StepsWithRawResponse:
+        return StepsWithRawResponse(self._runs.steps)
+
 
 class AsyncRunsWithRawResponse:
     def __init__(self, runs: AsyncRuns) -> None:
-        self.steps = AsyncStepsWithRawResponse(runs.steps)
+        self._runs = runs
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.cancel  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.submit_tool_outputs  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithRawResponse:
+        return AsyncStepsWithRawResponse(self._runs.steps)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.cancel  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.submit_tool_outputs  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+    @cached_property
+    def steps(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self._runs.steps)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
 
-        self.create = async_to_raw_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.create  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = async_to_raw_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = async_to_raw_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.update  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = async_to_raw_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.list  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = async_to_raw_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.cancel  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = async_to_raw_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.submit_tool_outputs  # pyright: ignore[reportDeprecated],
+            )
         )
+
+    @cached_property
+    def steps(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 4fcc87a0ff..eebb2003b2 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -1,39 +1,56 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+import typing_extensions
+from typing import List
 from typing_extensions import Literal
 
 import httpx
 
+from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._utils import maybe_transform, async_maybe_transform
+from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import AsyncPaginator, make_request_options
-from .....types.beta.threads.runs import RunStep, step_list_params
-
-if TYPE_CHECKING:
-    from ....._client import OpenAI, AsyncOpenAI
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
+from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 
 __all__ = ["Steps", "AsyncSteps"]
 
 
 class Steps(SyncAPIResource):
-    with_raw_response: StepsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return StepsWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = StepsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return StepsWithStreamingResponse(self)
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         step_id: str,
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -45,6 +62,14 @@ def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -53,15 +78,26 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         run_id: str,
@@ -69,6 +105,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -89,8 +126,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -106,7 +151,11 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs/{run_id}/steps",
             page=SyncCursorPage[RunStep],
@@ -119,6 +168,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -130,18 +180,33 @@ def list(
 
 
 class AsyncSteps(AsyncAPIResource):
-    with_raw_response: AsyncStepsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncStepsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncStepsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncStepsWithStreamingResponse(self)
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         step_id: str,
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -153,6 +218,14 @@ async def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -161,15 +234,26 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         run_id: str,
@@ -177,6 +261,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -197,8 +282,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -214,7 +307,11 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/runs/{run_id}/steps",
             page=AsyncCursorPage[RunStep],
@@ -227,6 +324,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -239,19 +337,63 @@ def list(
 
 class StepsWithRawResponse:
     def __init__(self, steps: Steps) -> None:
-        self.retrieve = to_raw_response_wrapper(
-            steps.retrieve,
+        self._steps = steps
+
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                steps.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = to_raw_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                steps.list  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
 class AsyncStepsWithRawResponse:
     def __init__(self, steps: AsyncSteps) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
-            steps.retrieve,
+        self._steps = steps
+
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                steps.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                steps.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class StepsWithStreamingResponse:
+    def __init__(self, steps: Steps) -> None:
+        self._steps = steps
+
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                steps.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                steps.list  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncStepsWithStreamingResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self._steps = steps
+
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                steps.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = async_to_raw_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                steps.list  # pyright: ignore[reportDeprecated],
+            )
         )
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 9469fc0513..ff2a41155d 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -1,54 +1,99 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, List, Optional
+import typing_extensions
+from typing import Union, Iterable, Optional
+from functools import partial
+from typing_extensions import Literal, overload
 
 import httpx
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
+from .... import _legacy_response
 from .messages import (
     Messages,
     AsyncMessages,
     MessagesWithRawResponse,
     AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
 from ....types.beta import (
-    Thread,
-    ThreadDeleted,
     thread_create_params,
     thread_update_params,
     thread_create_and_run_params,
 )
 from ...._base_client import make_request_options
-from ....types.beta.threads import Run
-
-if TYPE_CHECKING:
-    from ...._client import OpenAI, AsyncOpenAI
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
+from ....types.beta.thread import Thread
+from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
+from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_tool_param import AssistantToolParam
+from ....types.beta.assistant_stream_event import AssistantStreamEvent
+from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["Threads", "AsyncThreads"]
 
 
 class Threads(SyncAPIResource):
-    runs: Runs
-    messages: Messages
-    with_raw_response: ThreadsWithRawResponse
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.runs = Runs(client)
-        self.messages = Messages(client)
-        self.with_raw_response = ThreadsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ThreadsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def create(
         self,
         *,
-        messages: List[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -64,9 +109,16 @@ def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
 
           extra_headers: Send extra headers
 
@@ -76,13 +128,14 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             "/threads",
             body=maybe_transform(
                 {
                     "messages": messages,
                     "metadata": metadata,
+                    "tool_resources": tool_resources,
                 },
                 thread_create_params.ThreadCreateParams,
             ),
@@ -92,6 +145,7 @@ def create(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         thread_id: str,
@@ -115,7 +169,9 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._get(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -124,11 +180,13 @@ def retrieve(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -141,9 +199,16 @@ def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
 
           extra_headers: Send extra headers
 
@@ -153,16 +218,25 @@ def update(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/threads/{thread_id}",
-            body=maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def delete(
         self,
         thread_id: str,
@@ -186,7 +260,9 @@ def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._delete(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -195,15 +271,27 @@ def delete(
             cast_to=ThreadDeleted,
         )
 
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def create_and_run(
         self,
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[List[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -222,21 +310,225 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
-          thread: If no thread is provided, an empty thread will be created.
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
 
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -245,17 +537,373 @@ def create_and_run(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             "/threads/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
                     "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
                     "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = self.create_and_run(  # pyright: ignore[reportDeprecated]
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)  # pyright: ignore[reportDeprecated]
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
+                    "thread": thread,
+                    "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -263,25 +911,47 @@ def create_and_run(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
 
 class AsyncThreads(AsyncAPIResource):
-    runs: AsyncRuns
-    messages: AsyncMessages
-    with_raw_response: AsyncThreadsWithRawResponse
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.runs = AsyncRuns(client)
-        self.messages = AsyncMessages(client)
-        self.with_raw_response = AsyncThreadsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncThreadsWithStreamingResponse(self)
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def create(
         self,
         *,
-        messages: List[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -297,9 +967,16 @@ async def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
 
           extra_headers: Send extra headers
 
@@ -309,13 +986,14 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             "/threads",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "messages": messages,
                     "metadata": metadata,
+                    "tool_resources": tool_resources,
                 },
                 thread_create_params.ThreadCreateParams,
             ),
@@ -325,6 +1003,7 @@ async def create(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         thread_id: str,
@@ -348,7 +1027,9 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._get(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -357,11 +1038,13 @@ async def retrieve(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -374,9 +1057,16 @@ async def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          tool_resources: A set of resources that are made available to the assistant's tools in this
+              thread. The resources are specific to the type of tool. For example, the
+              `code_interpreter` tool requires a list of file IDs, while the `file_search`
+              tool requires a list of vector store IDs.
 
           extra_headers: Send extra headers
 
@@ -386,16 +1076,25 @@ async def update(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/threads/{thread_id}",
-            body=maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "tool_resources": tool_resources,
+                },
+                thread_update_params.ThreadUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def delete(
         self,
         thread_id: str,
@@ -419,7 +1118,9 @@ async def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._delete(
             f"/threads/{thread_id}",
             options=make_request_options(
@@ -428,15 +1129,27 @@ async def delete(
             cast_to=ThreadDeleted,
         )
 
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def create_and_run(
         self,
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[List[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -455,21 +1168,359 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
-          thread: If no thread is provided, an empty thread will be created.
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
 
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tool_resources: A set of resources that are used by the assistant's tools. The resources are
+              specific to the type of tool. For example, the `code_interpreter` tool requires
+              a list of file IDs, while the `file_search` tool requires a list of vector store
+              IDs.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -478,17 +1529,243 @@ async def create_and_run(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        ...
+
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
+            "/threads/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "thread": thread,
+                    "tool_choice": tool_choice,
+                    "tool_resources": tool_resources,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def create_and_run_poll(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        A helper to create a thread, start a run and then poll for a terminal state.
+        More information on Run lifecycles can be found here:
+        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        """
+        run = await self.create_and_run(  # pyright: ignore[reportDeprecated]
+            assistant_id=assistant_id,
+            instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
+            metadata=metadata,
+            model=model,
+            parallel_tool_calls=parallel_tool_calls,
+            response_format=response_format,
+            temperature=temperature,
+            stream=False,
+            thread=thread,
+            tool_resources=tool_resources,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
+            top_p=top_p,
+            tools=tools,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return await self.runs.poll(  # pyright: ignore[reportDeprecated]
+            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v2",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
             "/threads/runs",
             body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "response_format": response_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "tool_resources": tool_resources,
+                    "truncation_strategy": truncation_strategy,
+                    "top_p": top_p,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -496,48 +1773,163 @@ async def create_and_run(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class ThreadsWithRawResponse:
     def __init__(self, threads: Threads) -> None:
-        self.runs = RunsWithRawResponse(threads.runs)
-        self.messages = MessagesWithRawResponse(threads.messages)
+        self._threads = threads
 
-        self.create = to_raw_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.create  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = to_raw_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = to_raw_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.update  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = to_raw_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.delete  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = to_raw_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.create_and_run  # pyright: ignore[reportDeprecated],
+            )
         )
 
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._threads.messages)
+
 
 class AsyncThreadsWithRawResponse:
     def __init__(self, threads: AsyncThreads) -> None:
-        self.runs = AsyncRunsWithRawResponse(threads.runs)
-        self.messages = AsyncMessagesWithRawResponse(threads.messages)
+        self._threads = threads
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.create_and_run  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._threads.messages)
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
 
-        self.create = async_to_raw_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.create  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = async_to_raw_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.retrieve  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = async_to_raw_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.update  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = async_to_raw_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.delete  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = async_to_raw_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.create_and_run  # pyright: ignore[reportDeprecated],
+            )
         )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._threads.messages)
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.create  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.retrieve  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.update  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.delete  # pyright: ignore[reportDeprecated],
+            )
+        )
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.create_and_run  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._threads.runs)
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._threads.messages)
diff --git a/src/openai/resources/chat/__init__.py b/src/openai/resources/chat/__init__.py
index 2e56c0cbfa..52dfdceacc 100644
--- a/src/openai/resources/chat/__init__.py
+++ b/src/openai/resources/chat/__init__.py
@@ -1,11 +1,20 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .chat import Chat, AsyncChat, ChatWithRawResponse, AsyncChatWithRawResponse
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
 from .completions import (
     Completions,
     AsyncCompletions,
     CompletionsWithRawResponse,
     AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
 )
 
 __all__ = [
@@ -13,8 +22,12 @@
     "AsyncCompletions",
     "CompletionsWithRawResponse",
     "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
     "Chat",
     "AsyncChat",
     "ChatWithRawResponse",
     "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
 ]
diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py
index 3847b20512..14f9224b41 100644
--- a/src/openai/resources/chat/chat.py
+++ b/src/openai/resources/chat/chat.py
@@ -1,48 +1,102 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
+from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import (
+from .completions.completions import (
     Completions,
     AsyncCompletions,
     CompletionsWithRawResponse,
     AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
 )
 
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
-
 __all__ = ["Chat", "AsyncChat"]
 
 
 class Chat(SyncAPIResource):
-    completions: Completions
-    with_raw_response: ChatWithRawResponse
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.completions = Completions(client)
-        self.with_raw_response = ChatWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ChatWithStreamingResponse(self)
 
 
 class AsyncChat(AsyncAPIResource):
-    completions: AsyncCompletions
-    with_raw_response: AsyncChatWithRawResponse
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        return AsyncCompletions(self._client)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.completions = AsyncCompletions(client)
-        self.with_raw_response = AsyncChatWithRawResponse(self)
+    @cached_property
+    def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncChatWithStreamingResponse(self)
 
 
 class ChatWithRawResponse:
     def __init__(self, chat: Chat) -> None:
-        self.completions = CompletionsWithRawResponse(chat.completions)
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self._chat.completions)
 
 
 class AsyncChatWithRawResponse:
     def __init__(self, chat: AsyncChat) -> None:
-        self.completions = AsyncCompletionsWithRawResponse(chat.completions)
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self._chat.completions)
+
+
+class ChatWithStreamingResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatWithStreamingResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self._chat.completions)
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
deleted file mode 100644
index d0657b2f73..0000000000
--- a/src/openai/resources/chat/completions.py
+++ /dev/null
@@ -1,1244 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Dict, List, Union, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import required_args, maybe_transform
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ..._streaming import Stream, AsyncStream
-from ...types.chat import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionToolParam,
-    ChatCompletionMessageParam,
-    ChatCompletionToolChoiceOptionParam,
-    completion_create_params,
-)
-from ..._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    with_raw_response: CompletionsWithRawResponse
-
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = CompletionsWithRawResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        return self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    with_raw_response: AsyncCompletionsWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncCompletionsWithRawResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto`` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
-
-          response_format: An object specifying the format that the model must output.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        messages: List[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        return await self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self.create = to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self.create = async_to_raw_response_wrapper(
-            completions.create,
-        )
diff --git a/src/openai/resources/chat/completions/__init__.py b/src/openai/resources/chat/completions/__init__.py
new file mode 100644
index 0000000000..12d3b3aa28
--- /dev/null
+++ b/src/openai/resources/chat/completions/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+]
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
new file mode 100644
index 0000000000..5806296773
--- /dev/null
+++ b/src/openai/resources/chat/completions/completions.py
@@ -0,0 +1,2911 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import inspect
+from typing import Dict, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+import pydantic
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.chat import (
+    ChatCompletionAudioParam,
+    completion_list_params,
+    completion_create_params,
+    completion_update_params,
+)
+from ...._base_client import AsyncPaginator, make_request_options
+from ....lib._parsing import (
+    ResponseFormatT,
+    validate_input_tools as _validate_input_tools,
+    parse_chat_completion as _parse_chat_completion,
+    type_to_response_format_param as _type_to_response_format,
+)
+from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
+from ....types.shared.chat_model import ChatModel
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.shared_params.metadata import Metadata
+from ....types.shared.reasoning_effort import ReasoningEffort
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.parsed_chat_completion import ParsedChatCompletion
+from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
+from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import OpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = OpenAI()
+        completion = client.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        validate_response_format(response_format)
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+    def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._post(
+            f"/chat/completions/{completion_id}",
+            body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletion]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=SyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        with client.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request: partial[Stream[ChatCompletionChunk]] = partial(
+            self.create,
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            store=store,
+            stop=stop,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            web_search_options=web_search_options,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        return ChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    async def parse(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedChatCompletion[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+        You can pass a pydantic model to this method and it will automatically convert the model
+        into a JSON schema, send it to the API and parse the response content back into the given model.
+
+        This method will also automatically parse `function` tool calls if:
+        - You use the `openai.pydantic_function_tool()` helper method
+        - You mark your tool schema with `"strict": True`
+
+        Example usage:
+        ```py
+        from pydantic import BaseModel
+        from openai import AsyncOpenAI
+
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+
+        class MathResponse(BaseModel):
+            steps: List[Step]
+            final_answer: str
+
+
+        client = AsyncOpenAI()
+        completion = await client.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "You are a helpful math tutor."},
+                {"role": "user", "content": "solve 8x + 31 = 2"},
+            ],
+            response_format=MathResponse,
+        )
+
+        message = completion.choices[0].message
+        if message.parsed:
+            print(message.parsed.steps)
+            print("answer: ", message.parsed.final_answer)
+        ```
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.parse",
+            **(extra_headers or {}),
+        }
+
+        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+            return _parse_chat_completion(
+                response_format=response_format,
+                chat_completion=raw_completion,
+                input_tools=tools,
+            )
+
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": _type_to_response_format(response_format),
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stop": stop,
+                    "stream": False,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+            stream=False,
+        )
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        validate_response_format(response_format)
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+    async def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    async def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._post(
+            f"/chat/completions/{completion_id}",
+            body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=AsyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    async def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+    def stream(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
+        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+        and automatic accumulation of each delta.
+
+        This also supports all of the parsing utilities that `.parse()` does.
+
+        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+        ```py
+        async with client.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[...],
+        ) as stream:
+            async for event in stream:
+                if event.type == "content.delta":
+                    print(event.delta, flush=True, end="")
+        ```
+
+        When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+        the context manager.
+        """
+        _validate_input_tools(tools)
+
+        extra_headers = {
+            "X-Stainless-Helper-Method": "chat.completions.stream",
+            **(extra_headers or {}),
+        }
+
+        api_request = self.create(
+            messages=messages,
+            model=model,
+            audio=audio,
+            stream=True,
+            response_format=_type_to_response_format(response_format),
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            metadata=metadata,
+            modalities=modalities,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            prediction=prediction,
+            presence_penalty=presence_penalty,
+            reasoning_effort=reasoning_effort,
+            seed=seed,
+            service_tier=service_tier,
+            stop=stop,
+            store=store,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            web_search_options=web_search_options,
+        )
+        return AsyncChatCompletionStreamManager(
+            api_request,
+            response_format=response_format,
+            input_tools=tools,
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.parse = _legacy_response.to_raw_response_wrapper(
+            completions.parse,
+        )
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.parse = _legacy_response.async_to_raw_response_wrapper(
+            completions.parse,
+        )
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._completions.messages)
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.parse = to_streamed_response_wrapper(
+            completions.parse,
+        )
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.parse = async_to_streamed_response_wrapper(
+            completions.parse,
+        )
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._completions.messages)
+
+
+def validate_response_format(response_format: object) -> None:
+    if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
+        raise TypeError(
+            "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `chat.completions.parse()` instead"
+        )
diff --git a/src/openai/resources/chat/completions/messages.py b/src/openai/resources/chat/completions/messages.py
new file mode 100644
index 0000000000..fac15fba8b
--- /dev/null
+++ b/src/openai/resources/chat/completions/messages.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.chat.completions import message_list_params
+from ....types.chat.chat_completion_store_message import ChatCompletionStoreMessage
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletionStoreMessage]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=SyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletionStoreMessage, AsyncCursorPage[ChatCompletionStoreMessage]]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=AsyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index baf6f04fef..43b923b9b9 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -1,53 +1,55 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Dict, List, Union, Optional, overload
-from typing_extensions import Literal
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
-from ..types import Completion, completion_create_params
+from .. import _legacy_response
+from ..types import completion_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import required_args, maybe_transform
+from .._utils import required_args, maybe_transform, async_maybe_transform
+from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._streaming import Stream, AsyncStream
-from .._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from .._base_client import (
+    make_request_options,
+)
+from ..types.completion import Completion
+from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
 __all__ = ["Completions", "AsyncCompletions"]
 
 
 class Completions(SyncAPIResource):
-    with_raw_response: CompletionsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = CompletionsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
 
     @overload
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -59,6 +61,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -77,8 +80,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -103,30 +106,30 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -143,7 +146,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -152,7 +155,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -162,8 +167,12 @@ def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -178,7 +187,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -194,22 +203,8 @@ def create(
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -221,6 +216,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -239,8 +235,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -272,30 +268,30 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -312,7 +308,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -321,11 +317,17 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -340,7 +342,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -356,22 +358,8 @@ def create(
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -383,6 +371,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -401,8 +390,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -434,30 +423,30 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -474,7 +463,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -483,11 +472,17 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -502,7 +497,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -518,22 +513,8 @@ def create(
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -545,6 +526,7 @@ def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -573,12 +555,15 @@ def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -590,32 +575,31 @@ def create(
 
 
 class AsyncCompletions(AsyncAPIResource):
-    with_raw_response: AsyncCompletionsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncCompletionsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -627,6 +611,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -645,8 +630,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -671,30 +656,30 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -711,7 +696,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -720,7 +705,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -730,8 +717,12 @@ async def create(
               message.
               [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -746,7 +737,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -762,22 +753,8 @@ async def create(
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -789,6 +766,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -807,8 +785,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -840,30 +818,30 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -880,7 +858,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -889,11 +867,17 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -908,7 +892,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -924,22 +908,8 @@ async def create(
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
@@ -951,6 +921,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -969,8 +940,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -1002,30 +973,30 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
               Accepts a JSON object that maps tokens (specified by their token ID in the GPT
               tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-              convert text to token IDs. Mathematically, the bias is added to the logits
-              generated by the model prior to sampling. The exact effect will vary per model,
-              but values between -1 and 1 should decrease or increase likelihood of selection;
-              values like -100 or 100 should result in a ban or exclusive selection of the
-              relevant token.
+              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+              Mathematically, the bias is added to the logits generated by the model prior to
+              sampling. The exact effect will vary per model, but values between -1 and 1
+              should decrease or increase likelihood of selection; values like -100 or 100
+              should result in a ban or exclusive selection of the relevant token.
 
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -1042,7 +1013,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -1051,11 +1022,17 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -1070,7 +1047,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -1086,22 +1063,8 @@ async def create(
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ],
-        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
+        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
         best_of: Optional[int] | NotGiven = NOT_GIVEN,
         echo: Optional[bool] | NotGiven = NOT_GIVEN,
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1113,6 +1076,7 @@ async def create(
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1126,7 +1090,7 @@ async def create(
     ) -> Completion | AsyncStream[Completion]:
         return await self._post(
             "/completions",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "model": model,
                     "prompt": prompt,
@@ -1141,12 +1105,15 @@ async def create(
                     "seed": seed,
                     "stop": stop,
                     "stream": stream,
+                    "stream_options": stream_options,
                     "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1159,13 +1126,35 @@ async def create(
 
 class CompletionsWithRawResponse:
     def __init__(self, completions: Completions) -> None:
-        self.create = to_raw_response_wrapper(
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             completions.create,
         )
 
 
 class AsyncCompletionsWithRawResponse:
     def __init__(self, completions: AsyncCompletions) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
             completions.create,
         )
diff --git a/src/openai/resources/containers/__init__.py b/src/openai/resources/containers/__init__.py
new file mode 100644
index 0000000000..dc1936780b
--- /dev/null
+++ b/src/openai/resources/containers/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .containers import (
+    Containers,
+    AsyncContainers,
+    ContainersWithRawResponse,
+    AsyncContainersWithRawResponse,
+    ContainersWithStreamingResponse,
+    AsyncContainersWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "Containers",
+    "AsyncContainers",
+    "ContainersWithRawResponse",
+    "AsyncContainersWithRawResponse",
+    "ContainersWithStreamingResponse",
+    "AsyncContainersWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/threads/messages/messages.py b/src/openai/resources/containers/containers.py
similarity index 53%
rename from src/openai/resources/beta/threads/messages/messages.py
rename to src/openai/resources/containers/containers.py
index 30ae072512..71e5e6b08d 100644
--- a/src/openai/resources/beta/threads/messages/messages.py
+++ b/src/openai/resources/containers/containers.py
@@ -1,74 +1,82 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, List, Optional
+from typing import List
 from typing_extensions import Literal
 
 import httpx
 
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import AsyncPaginator, make_request_options
-from .....types.beta.threads import (
-    ThreadMessage,
-    message_list_params,
-    message_create_params,
-    message_update_params,
+from ... import _legacy_response
+from ...types import container_list_params, container_create_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .files.files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
 )
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.container_list_response import ContainerListResponse
+from ...types.container_create_response import ContainerCreateResponse
+from ...types.container_retrieve_response import ContainerRetrieveResponse
 
-if TYPE_CHECKING:
-    from ....._client import OpenAI, AsyncOpenAI
+__all__ = ["Containers", "AsyncContainers"]
 
-__all__ = ["Messages", "AsyncMessages"]
 
+class Containers(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
 
-class Messages(SyncAPIResource):
-    files: Files
-    with_raw_response: MessagesWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> ContainersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ContainersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ContainersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.files = Files(client)
-        self.with_raw_response = MessagesWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ContainersWithStreamingResponse(self)
 
     def create(
         self,
-        thread_id: str,
         *,
-        content: str,
-        role: Literal["user"],
+        name: str,
+        expires_after: container_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> ContainerCreateResponse:
         """
-        Create a message.
+        Create Container
 
         Args:
-          content: The content of the message.
+          name: Name of the container to create.
 
-          role: The role of the entity that is creating the message. Currently only `user` is
-              supported.
+          expires_after: Container expiration time in seconds relative to the 'anchor' time.
 
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+          file_ids: IDs of files to copy to the container.
 
           extra_headers: Send extra headers
 
@@ -78,38 +86,35 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._post(
-            f"/threads/{thread_id}/messages",
+            "/containers",
             body=maybe_transform(
                 {
-                    "content": content,
-                    "role": role,
+                    "name": name,
+                    "expires_after": expires_after,
                     "file_ids": file_ids,
-                    "metadata": metadata,
                 },
-                message_create_params.MessageCreateParams,
+                container_create_params.ContainerCreateParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=ContainerCreateResponse,
         )
 
     def retrieve(
         self,
-        message_id: str,
+        container_id: str,
         *,
-        thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> ContainerRetrieveResponse:
         """
-        Retrieve a message.
+        Retrieve Container
 
         Args:
           extra_headers: Send extra headers
@@ -120,61 +125,20 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
         return self._get(
-            f"/threads/{thread_id}/messages/{message_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ThreadMessage,
-        )
-
-    def update(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
-        """
-        Modifies a message.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/messages/{message_id}",
-            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+            f"/containers/{container_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=ContainerRetrieveResponse,
         )
 
     def list(
         self,
-        thread_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -183,21 +147,17 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[ThreadMessage]:
-        """
-        Returns a list of messages for a given thread.
+    ) -> SyncCursorPage[ContainerListResponse]:
+        """List Containers
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
               ending with obj_foo, your subsequent call can include after=obj_foo in order to
               fetch the next page of the list.
 
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -212,10 +172,9 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._get_api_list(
-            f"/threads/{thread_id}/messages",
-            page=SyncCursorPage[ThreadMessage],
+            "/containers",
+            page=SyncCursorPage[ContainerListResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -224,60 +183,30 @@ def list(
                 query=maybe_transform(
                     {
                         "after": after,
-                        "before": before,
                         "limit": limit,
                         "order": order,
                     },
-                    message_list_params.MessageListParams,
+                    container_list_params.ContainerListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=ContainerListResponse,
         )
 
-
-class AsyncMessages(AsyncAPIResource):
-    files: AsyncFiles
-    with_raw_response: AsyncMessagesWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.files = AsyncFiles(client)
-        self.with_raw_response = AsyncMessagesWithRawResponse(self)
-
-    async def create(
+    def delete(
         self,
-        thread_id: str,
+        container_id: str,
         *,
-        content: str,
-        role: Literal["user"],
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> None:
         """
-        Create a message.
+        Delete Container
 
         Args:
-          content: The content of the message.
-
-          role: The role of the entity that is creating the message. Currently only `user` is
-              supported.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -286,40 +215,65 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/messages",
-            body=maybe_transform(
-                {
-                    "content": content,
-                    "role": role,
-                    "file_ids": file_ids,
-                    "metadata": metadata,
-                },
-                message_create_params.MessageCreateParams,
-            ),
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/containers/{container_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=NoneType,
         )
 
-    async def retrieve(
+
+class AsyncContainers(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncContainersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncContainersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncContainersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncContainersWithStreamingResponse(self)
+
+    async def create(
         self,
-        message_id: str,
         *,
-        thread_id: str,
+        name: str,
+        expires_after: container_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> ContainerCreateResponse:
         """
-        Retrieve a message.
+        Create Container
 
         Args:
+          name: Name of the container to create.
+
+          expires_after: Container expiration time in seconds relative to the 'anchor' time.
+
+          file_ids: IDs of files to copy to the container.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -328,37 +282,37 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/messages/{message_id}",
+        return await self._post(
+            "/containers",
+            body=await async_maybe_transform(
+                {
+                    "name": name,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                },
+                container_create_params.ContainerCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=ContainerCreateResponse,
         )
 
-    async def update(
+    async def retrieve(
         self,
-        message_id: str,
+        container_id: str,
         *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> ContainerRetrieveResponse:
         """
-        Modifies a message.
+        Retrieve Container
 
         Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -367,22 +321,20 @@ async def update(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/messages/{message_id}",
-            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        return await self._get(
+            f"/containers/{container_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=ContainerRetrieveResponse,
         )
 
     def list(
         self,
-        thread_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -391,21 +343,17 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[ThreadMessage, AsyncCursorPage[ThreadMessage]]:
-        """
-        Returns a list of messages for a given thread.
+    ) -> AsyncPaginator[ContainerListResponse, AsyncCursorPage[ContainerListResponse]]:
+        """List Containers
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
               ending with obj_foo, your subsequent call can include after=obj_foo in order to
               fetch the next page of the list.
 
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -420,10 +368,9 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._get_api_list(
-            f"/threads/{thread_id}/messages",
-            page=AsyncCursorPage[ThreadMessage],
+            "/containers",
+            page=AsyncCursorPage[ContainerListResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -432,48 +379,133 @@ def list(
                 query=maybe_transform(
                     {
                         "after": after,
-                        "before": before,
                         "limit": limit,
                         "order": order,
                     },
-                    message_list_params.MessageListParams,
+                    container_list_params.ContainerListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=ContainerListResponse,
+        )
+
+    async def delete(
+        self,
+        container_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Delete Container
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/containers/{container_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
         )
 
 
-class MessagesWithRawResponse:
-    def __init__(self, messages: Messages) -> None:
-        self.files = FilesWithRawResponse(messages.files)
+class ContainersWithRawResponse:
+    def __init__(self, containers: Containers) -> None:
+        self._containers = containers
 
-        self.create = to_raw_response_wrapper(
-            messages.create,
+        self.create = _legacy_response.to_raw_response_wrapper(
+            containers.create,
         )
-        self.retrieve = to_raw_response_wrapper(
-            messages.retrieve,
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            containers.retrieve,
         )
-        self.update = to_raw_response_wrapper(
-            messages.update,
+        self.list = _legacy_response.to_raw_response_wrapper(
+            containers.list,
         )
-        self.list = to_raw_response_wrapper(
-            messages.list,
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            containers.delete,
         )
 
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._containers.files)
+
 
-class AsyncMessagesWithRawResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self.files = AsyncFilesWithRawResponse(messages.files)
+class AsyncContainersWithRawResponse:
+    def __init__(self, containers: AsyncContainers) -> None:
+        self._containers = containers
 
-        self.create = async_to_raw_response_wrapper(
-            messages.create,
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            containers.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
-            messages.retrieve,
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            containers.retrieve,
         )
-        self.update = async_to_raw_response_wrapper(
-            messages.update,
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            containers.list,
         )
-        self.list = async_to_raw_response_wrapper(
-            messages.list,
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            containers.delete,
         )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._containers.files)
+
+
+class ContainersWithStreamingResponse:
+    def __init__(self, containers: Containers) -> None:
+        self._containers = containers
+
+        self.create = to_streamed_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            containers.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._containers.files)
+
+
+class AsyncContainersWithStreamingResponse:
+    def __init__(self, containers: AsyncContainers) -> None:
+        self._containers = containers
+
+        self.create = async_to_streamed_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            containers.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._containers.files)
diff --git a/src/openai/resources/containers/files/__init__.py b/src/openai/resources/containers/files/__init__.py
new file mode 100644
index 0000000000..f71f7dbf55
--- /dev/null
+++ b/src/openai/resources/containers/files/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .content import (
+    Content,
+    AsyncContent,
+    ContentWithRawResponse,
+    AsyncContentWithRawResponse,
+    ContentWithStreamingResponse,
+    AsyncContentWithStreamingResponse,
+)
+
+__all__ = [
+    "Content",
+    "AsyncContent",
+    "ContentWithRawResponse",
+    "AsyncContentWithRawResponse",
+    "ContentWithStreamingResponse",
+    "AsyncContentWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+]
diff --git a/src/openai/resources/containers/files/content.py b/src/openai/resources/containers/files/content.py
new file mode 100644
index 0000000000..a200383407
--- /dev/null
+++ b/src/openai/resources/containers/files/content.py
@@ -0,0 +1,173 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+
+__all__ = ["Content", "AsyncContent"]
+
+
+class Content(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ContentWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ContentWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ContentWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ContentWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Retrieve Container File Content
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/containers/{container_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class AsyncContent(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncContentWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncContentWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncContentWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncContentWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Retrieve Container File Content
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/containers/{container_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class ContentWithRawResponse:
+    def __init__(self, content: Content) -> None:
+        self._content = content
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            content.retrieve,
+        )
+
+
+class AsyncContentWithRawResponse:
+    def __init__(self, content: AsyncContent) -> None:
+        self._content = content
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            content.retrieve,
+        )
+
+
+class ContentWithStreamingResponse:
+    def __init__(self, content: Content) -> None:
+        self._content = content
+
+        self.retrieve = to_custom_streamed_response_wrapper(
+            content.retrieve,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncContentWithStreamingResponse:
+    def __init__(self, content: AsyncContent) -> None:
+        self._content = content
+
+        self.retrieve = async_to_custom_streamed_response_wrapper(
+            content.retrieve,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/src/openai/resources/beta/assistants/files.py b/src/openai/resources/containers/files/files.py
similarity index 51%
rename from src/openai/resources/beta/assistants/files.py
rename to src/openai/resources/containers/files/files.py
index 5ac5897ca3..624398b97b 100644
--- a/src/openai/resources/beta/assistants/files.py
+++ b/src/openai/resources/containers/files/files.py
@@ -1,59 +1,83 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import Mapping, cast
 from typing_extensions import Literal
 
 import httpx
 
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform
+from .... import _legacy_response
+from .content import (
+    Content,
+    AsyncContent,
+    ContentWithRawResponse,
+    AsyncContentWithRawResponse,
+    ContentWithStreamingResponse,
+    AsyncContentWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven, FileTypes
+from ...._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
 from ...._base_client import AsyncPaginator, make_request_options
-from ....types.beta.assistants import (
-    AssistantFile,
-    FileDeleteResponse,
-    file_list_params,
-    file_create_params,
-)
-
-if TYPE_CHECKING:
-    from ...._client import OpenAI, AsyncOpenAI
+from ....types.containers import file_list_params, file_create_params
+from ....types.containers.file_list_response import FileListResponse
+from ....types.containers.file_create_response import FileCreateResponse
+from ....types.containers.file_retrieve_response import FileRetrieveResponse
 
 __all__ = ["Files", "AsyncFiles"]
 
 
 class Files(SyncAPIResource):
-    with_raw_response: FilesWithRawResponse
+    @cached_property
+    def content(self) -> Content:
+        return Content(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = FilesWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
 
     def create(
         self,
-        assistant_id: str,
+        container_id: str,
         *,
-        file_id: str,
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        file_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> FileCreateResponse:
         """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
+        Create a Container File
+
+        You can send either a multipart/form-data request with the raw file content, or
+        a JSON request with a file ID.
 
         Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
+          file: The File object (not file name) to be uploaded.
+
+          file_id: Name of the file to create.
 
           extra_headers: Send extra headers
 
@@ -63,30 +87,43 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
-            f"/assistants/{assistant_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            f"/containers/{container_id}/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=FileCreateResponse,
         )
 
     def retrieve(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        container_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> FileRetrieveResponse:
         """
-        Retrieves an AssistantFile.
+        Retrieve Container File
 
         Args:
           extra_headers: Send extra headers
@@ -97,21 +134,23 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/containers/{container_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=FileRetrieveResponse,
         )
 
     def list(
         self,
-        assistant_id: str,
+        container_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -120,21 +159,17 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[AssistantFile]:
-        """
-        Returns a list of assistant files.
+    ) -> SyncCursorPage[FileListResponse]:
+        """List Container files
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
               ending with obj_foo, your subsequent call can include after=obj_foo in order to
               fetch the next page of the list.
 
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -149,10 +184,11 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
         return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=SyncCursorPage[AssistantFile],
+            f"/containers/{container_id}/files",
+            page=SyncCursorPage[FileListResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -161,30 +197,29 @@ def list(
                 query=maybe_transform(
                     {
                         "after": after,
-                        "before": before,
                         "limit": limit,
                         "order": order,
                     },
                     file_list_params.FileListParams,
                 ),
             ),
-            model=AssistantFile,
+            model=FileListResponse,
         )
 
     def delete(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        container_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
+    ) -> None:
         """
-        Delete an assistant file.
+        Delete Container File
 
         Args:
           extra_headers: Send extra headers
@@ -195,44 +230,67 @@ def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/containers/{container_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=FileDeleteResponse,
+            cast_to=NoneType,
         )
 
 
 class AsyncFiles(AsyncAPIResource):
-    with_raw_response: AsyncFilesWithRawResponse
+    @cached_property
+    def content(self) -> AsyncContent:
+        return AsyncContent(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncFilesWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
 
     async def create(
         self,
-        assistant_id: str,
+        container_id: str,
         *,
-        file_id: str,
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        file_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> FileCreateResponse:
         """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
+        Create a Container File
+
+        You can send either a multipart/form-data request with the raw file content, or
+        a JSON request with a file ID.
 
         Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
+          file: The File object (not file name) to be uploaded.
+
+          file_id: Name of the file to create.
 
           extra_headers: Send extra headers
 
@@ -242,30 +300,43 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
-            f"/assistants/{assistant_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            f"/containers/{container_id}/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=FileCreateResponse,
         )
 
     async def retrieve(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        container_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
+    ) -> FileRetrieveResponse:
         """
-        Retrieves an AssistantFile.
+        Retrieve Container File
 
         Args:
           extra_headers: Send extra headers
@@ -276,21 +347,23 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return await self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/containers/{container_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=AssistantFile,
+            cast_to=FileRetrieveResponse,
         )
 
     def list(
         self,
-        assistant_id: str,
+        container_id: str,
         *,
         after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -299,21 +372,17 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[AssistantFile, AsyncCursorPage[AssistantFile]]:
-        """
-        Returns a list of assistant files.
+    ) -> AsyncPaginator[FileListResponse, AsyncCursorPage[FileListResponse]]:
+        """List Container files
 
         Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
               ending with obj_foo, your subsequent call can include after=obj_foo in order to
               fetch the next page of the list.
 
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
@@ -328,10 +397,11 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
         return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=AsyncCursorPage[AssistantFile],
+            f"/containers/{container_id}/files",
+            page=AsyncCursorPage[FileListResponse],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -340,30 +410,29 @@ def list(
                 query=maybe_transform(
                     {
                         "after": after,
-                        "before": before,
                         "limit": limit,
                         "order": order,
                     },
                     file_list_params.FileListParams,
                 ),
             ),
-            model=AssistantFile,
+            model=FileListResponse,
         )
 
     async def delete(
         self,
         file_id: str,
         *,
-        assistant_id: str,
+        container_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
+    ) -> None:
         """
-        Delete an assistant file.
+        Delete Container File
 
         Args:
           extra_headers: Send extra headers
@@ -374,43 +443,103 @@ async def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
+            f"/containers/{container_id}/files/{file_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=FileDeleteResponse,
+            cast_to=NoneType,
         )
 
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
-        self.create = to_raw_response_wrapper(
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
 
+    @cached_property
+    def content(self) -> ContentWithRawResponse:
+        return ContentWithRawResponse(self._files.content)
+
 
 class AsyncFilesWithRawResponse:
     def __init__(self, files: AsyncFiles) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = _legacy_response.async_to_raw_response_wrapper(
             files.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
             files.delete,
         )
+
+    @cached_property
+    def content(self) -> AsyncContentWithRawResponse:
+        return AsyncContentWithRawResponse(self._files.content)
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> ContentWithStreamingResponse:
+        return ContentWithStreamingResponse(self._files.content)
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> AsyncContentWithStreamingResponse:
+        return AsyncContentWithStreamingResponse(self._files.content)
diff --git a/src/openai/resources/edits.py b/src/openai/resources/edits.py
deleted file mode 100644
index eafaa82fdf..0000000000
--- a/src/openai/resources/edits.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import typing_extensions
-from typing import TYPE_CHECKING, Union, Optional
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import Edit, edit_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from .._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
-
-__all__ = ["Edits", "AsyncEdits"]
-
-
-class Edits(SyncAPIResource):
-    with_raw_response: EditsWithRawResponse
-
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = EditsWithRawResponse(self)
-
-    @typing_extensions.deprecated(
-        "The Edits API is deprecated; please use Chat Completions instead.\n\nhttps://openai.com/blog/gpt-4-api-general-availability#deprecation-of-the-edits-api\n"
-    )
-    def create(
-        self,
-        *,
-        instruction: str,
-        model: Union[str, Literal["text-davinci-edit-001", "code-davinci-edit-001"]],
-        input: Optional[str] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Edit:
-        """
-        Creates a new edit for the provided input, instruction, and parameters.
-
-        Args:
-          instruction: The instruction that tells the model how to edit the prompt.
-
-          model: ID of the model to use. You can use the `text-davinci-edit-001` or
-              `code-davinci-edit-001` model with this endpoint.
-
-          input: The input text to use as a starting point for the edit.
-
-          n: How many edits to generate for the input and instruction.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/edits",
-            body=maybe_transform(
-                {
-                    "instruction": instruction,
-                    "model": model,
-                    "input": input,
-                    "n": n,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                },
-                edit_create_params.EditCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Edit,
-        )
-
-
-class AsyncEdits(AsyncAPIResource):
-    with_raw_response: AsyncEditsWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncEditsWithRawResponse(self)
-
-    @typing_extensions.deprecated(
-        "The Edits API is deprecated; please use Chat Completions instead.\n\nhttps://openai.com/blog/gpt-4-api-general-availability#deprecation-of-the-edits-api\n"
-    )
-    async def create(
-        self,
-        *,
-        instruction: str,
-        model: Union[str, Literal["text-davinci-edit-001", "code-davinci-edit-001"]],
-        input: Optional[str] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Edit:
-        """
-        Creates a new edit for the provided input, instruction, and parameters.
-
-        Args:
-          instruction: The instruction that tells the model how to edit the prompt.
-
-          model: ID of the model to use. You can use the `text-davinci-edit-001` or
-              `code-davinci-edit-001` model with this endpoint.
-
-          input: The input text to use as a starting point for the edit.
-
-          n: How many edits to generate for the input and instruction.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/edits",
-            body=maybe_transform(
-                {
-                    "instruction": instruction,
-                    "model": model,
-                    "input": input,
-                    "n": n,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                },
-                edit_create_params.EditCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Edit,
-        )
-
-
-class EditsWithRawResponse:
-    def __init__(self, edits: Edits) -> None:
-        self.create = to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            edits.create  # pyright: ignore[reportDeprecated],
-        )
-
-
-class AsyncEditsWithRawResponse:
-    def __init__(self, edits: AsyncEdits) -> None:
-        self.create = async_to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            edits.create  # pyright: ignore[reportDeprecated],
-        )
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index c31ad9d931..553dacc284 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -1,40 +1,55 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+import array
 import base64
-from typing import TYPE_CHECKING, List, Union, cast
+from typing import List, Union, Iterable, cast
 from typing_extensions import Literal
 
 import httpx
 
-from ..types import CreateEmbeddingResponse, embedding_create_params
+from .. import _legacy_response
+from ..types import embedding_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import is_given, maybe_transform
-from .._extras import numpy as np
-from .._extras import has_numpy
+from .._compat import cached_property
+from .._extras import numpy as np, has_numpy
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from ..types.embedding_model import EmbeddingModel
+from ..types.create_embedding_response import CreateEmbeddingResponse
 
 __all__ = ["Embeddings", "AsyncEmbeddings"]
 
 
 class Embeddings(SyncAPIResource):
-    with_raw_response: EmbeddingsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EmbeddingsWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = EmbeddingsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EmbeddingsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        input: Union[str, List[str], List[int], List[List[int]]],
-        model: Union[str, Literal["text-embedding-ada-002"]],
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -51,22 +66,28 @@ def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`) and cannot be an empty string.
+              all embedding models), cannot be an empty string, and any array must be 2048
+              dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
 
           encoding_format: The format to return the embeddings in. Can be either `float` or
               [`base64`](https://pypi.org/project/pybase64/).
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -80,9 +101,10 @@ def create(
             "input": input,
             "model": model,
             "user": user,
+            "dimensions": dimensions,
             "encoding_format": encoding_format,
         }
-        if not is_given(encoding_format) and has_numpy():
+        if not is_given(encoding_format):
             params["encoding_format"] = "base64"
 
         def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
@@ -93,12 +115,14 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
             for embedding in obj.data:
                 data = cast(object, embedding.embedding)
                 if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
                     continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
+                if not has_numpy():
+                    # use array for base64 optimisation
+                    embedding.embedding = array.array("f", base64.b64decode(data)).tolist()
+                else:
+                    embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                        base64.b64decode(data), dtype="float32"
+                    ).tolist()
 
             return obj
 
@@ -117,17 +141,31 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
 
 
 class AsyncEmbeddings(AsyncAPIResource):
-    with_raw_response: AsyncEmbeddingsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEmbeddingsWithRawResponse(self)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncEmbeddingsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        input: Union[str, List[str], List[int], List[List[int]]],
-        model: Union[str, Literal["text-embedding-ada-002"]],
+        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -144,22 +182,28 @@ async def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`) and cannot be an empty string.
+              all embedding models), cannot be an empty string, and any array must be 2048
+              dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
+
+          dimensions: The number of dimensions the resulting output embeddings should have. Only
+              supported in `text-embedding-3` and later models.
 
           encoding_format: The format to return the embeddings in. Can be either `float` or
               [`base64`](https://pypi.org/project/pybase64/).
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -173,9 +217,10 @@ async def create(
             "input": input,
             "model": model,
             "user": user,
+            "dimensions": dimensions,
             "encoding_format": encoding_format,
         }
-        if not is_given(encoding_format) and has_numpy():
+        if not is_given(encoding_format):
             params["encoding_format"] = "base64"
 
         def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
@@ -186,12 +231,14 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
             for embedding in obj.data:
                 data = cast(object, embedding.embedding)
                 if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
                     continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
+                if not has_numpy():
+                    # use array for base64 optimisation
+                    embedding.embedding = array.array("f", base64.b64decode(data)).tolist()
+                else:
+                    embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
+                        base64.b64decode(data), dtype="float32"
+                    ).tolist()
 
             return obj
 
@@ -211,13 +258,35 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
 
 class EmbeddingsWithRawResponse:
     def __init__(self, embeddings: Embeddings) -> None:
-        self.create = to_raw_response_wrapper(
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             embeddings.create,
         )
 
 
 class AsyncEmbeddingsWithRawResponse:
     def __init__(self, embeddings: AsyncEmbeddings) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._embeddings = embeddings
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_streamed_response_wrapper(
             embeddings.create,
         )
diff --git a/src/openai/resources/evals/__init__.py b/src/openai/resources/evals/__init__.py
new file mode 100644
index 0000000000..84f707511d
--- /dev/null
+++ b/src/openai/resources/evals/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/evals.py b/src/openai/resources/evals/evals.py
new file mode 100644
index 0000000000..7aba192c51
--- /dev/null
+++ b/src/openai/resources/evals/evals.py
@@ -0,0 +1,662 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ...types import eval_list_params, eval_create_params, eval_update_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.eval_list_response import EvalListResponse
+from ...types.eval_create_response import EvalCreateResponse
+from ...types.eval_delete_response import EvalDeleteResponse
+from ...types.eval_update_response import EvalUpdateResponse
+from ...types.eval_retrieve_response import EvalRetrieveResponse
+from ...types.shared_params.metadata import Metadata
+
+__all__ = ["Evals", "AsyncEvals"]
+
+
+class Evals(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> EvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EvalsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and the config for a
+        data source, which dictates the schema of the data used in the evaluation. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
+              schema of the data used in the evaluation.
+
+          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
+              variables in the data source using double curly braces notation, like
+              `{{item.variable_name}}`. To reference the model's output, use the `sample`
+              namespace (ie, `{{sample.output_text}}`).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/evals",
+            body=maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[EvalListResponse]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=SyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class AsyncEvals(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncEvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEvalsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and the config for a
+        data source, which dictates the schema of the data used in the evaluation. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
+              schema of the data used in the evaluation.
+
+          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
+              variables in the data source using double curly braces notation, like
+              `{{item.variable_name}}`. To reference the model's output, use the `sample`
+              namespace (ie, `{{sample.output_text}}`).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/evals",
+            body=await async_maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    async def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=AsyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    async def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class EvalsWithRawResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._evals.runs)
+
+
+class AsyncEvalsWithRawResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._evals.runs)
+
+
+class EvalsWithStreamingResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._evals.runs)
+
+
+class AsyncEvalsWithStreamingResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = async_to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._evals.runs)
diff --git a/src/openai/resources/evals/runs/__init__.py b/src/openai/resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..d189f16fb7
--- /dev/null
+++ b/src/openai/resources/evals/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "OutputItems",
+    "AsyncOutputItems",
+    "OutputItemsWithRawResponse",
+    "AsyncOutputItemsWithRawResponse",
+    "OutputItemsWithStreamingResponse",
+    "AsyncOutputItemsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/runs/output_items.py b/src/openai/resources/evals/runs/output_items.py
new file mode 100644
index 0000000000..8fd0fdea92
--- /dev/null
+++ b/src/openai/resources/evals/runs/output_items.py
@@ -0,0 +1,315 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.evals.runs import output_item_list_params
+from ....types.evals.runs.output_item_list_response import OutputItemListResponse
+from ....types.evals.runs.output_item_retrieve_response import OutputItemRetrieveResponse
+
+__all__ = ["OutputItems", "AsyncOutputItems"]
+
+
+class OutputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> OutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return OutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> OutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return OutputItemsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[OutputItemListResponse]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=SyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class AsyncOutputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncOutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncOutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncOutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncOutputItemsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[OutputItemListResponse, AsyncCursorPage[OutputItemListResponse]]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=AsyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class OutputItemsWithRawResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithRawResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class OutputItemsWithStreamingResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithStreamingResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            output_items.list,
+        )
diff --git a/src/openai/resources/evals/runs/runs.py b/src/openai/resources/evals/runs/runs.py
new file mode 100644
index 0000000000..7efc61292c
--- /dev/null
+++ b/src/openai/resources/evals/runs/runs.py
@@ -0,0 +1,634 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.evals import run_list_params, run_create_params
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared_params.metadata import Metadata
+from ....types.evals.run_list_response import RunListResponse
+from ....types.evals.run_cancel_response import RunCancelResponse
+from ....types.evals.run_create_response import RunCreateResponse
+from ....types.evals.run_delete_response import RunDeleteResponse
+from ....types.evals.run_retrieve_response import RunRetrieveResponse
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def output_items(self) -> OutputItems:
+        return OutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
+
+    def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCreateResponse:
+        """
+        Kicks off a new run for a given evaluation, specifying the data source, and what
+        model configuration to use to test. The datasource will be validated against the
+        schema specified in the config of the evaluation.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs",
+            body=maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[RunListResponse]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=SyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def output_items(self) -> AsyncOutputItems:
+        return AsyncOutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCreateResponse:
+        """
+        Kicks off a new run for a given evaluation, specifying the data source, and what
+        model configuration to use to test. The datasource will be validated against the
+        schema specified in the config of the evaluation.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[RunListResponse, AsyncCursorPage[RunListResponse]]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=AsyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    async def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithRawResponse:
+        return OutputItemsWithRawResponse(self._runs.output_items)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithRawResponse:
+        return AsyncOutputItemsWithRawResponse(self._runs.output_items)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithStreamingResponse:
+        return OutputItemsWithStreamingResponse(self._runs.output_items)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithStreamingResponse:
+        return AsyncOutputItemsWithStreamingResponse(self._runs.output_items)
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index a6f75e5a4c..179af870ba 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -1,44 +1,62 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import time
 import typing_extensions
-from typing import TYPE_CHECKING, Mapping, cast
+from typing import Mapping, cast
 from typing_extensions import Literal
 
 import httpx
 
-from ..types import FileObject, FileDeleted, file_list_params, file_create_params
+from .. import _legacy_response
+from ..types import FilePurpose, file_list_params, file_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import extract_files, maybe_transform, deepcopy_minimal
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import (
-    AsyncPaginator,
-    HttpxBinaryResponseContent,
-    make_request_options,
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
 )
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.file_object import FileObject
+from ..types.file_deleted import FileDeleted
+from ..types.file_purpose import FilePurpose
 
 __all__ = ["Files", "AsyncFiles"]
 
 
 class Files(SyncAPIResource):
-    with_raw_response: FilesWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = FilesWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
+        purpose: FilePurpose,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -46,15 +64,26 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileObject:
-        """Upload a file that can be used across various endpoints/features.
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
 
-        The size of
-        all the files uploaded by one organization can be up to 100 GB.
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
 
-        The size of individual files for can be a maximum of 512MB. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
+
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -62,14 +91,10 @@ def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
-
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
           extra_headers: Send extra headers
 
@@ -86,12 +111,10 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/files",
             body=maybe_transform(body, file_create_params.FileCreateParams),
@@ -125,6 +148,8 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return self._get(
             f"/files/{file_id}",
             options=make_request_options(
@@ -136,6 +161,9 @@ def retrieve(
     def list(
         self,
         *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         purpose: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -143,11 +171,23 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[FileObject]:
-        """
-        Returns a list of files that belong to the user's organization.
+    ) -> SyncCursorPage[FileObject]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -160,13 +200,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=SyncPage[FileObject],
+            page=SyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -194,6 +242,8 @@ def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return self._delete(
             f"/files/{file_id}",
             options=make_request_options(
@@ -212,7 +262,7 @@ def content(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
 
@@ -225,12 +275,15 @@ def content(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
         return self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
     @typing_extensions.deprecated("The `.content()` method should be used instead")
@@ -257,7 +310,8 @@ def retrieve_content(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"Accept": "application/json", **(extra_headers or {})}
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
@@ -291,17 +345,30 @@ def wait_for_processing(
 
 
 class AsyncFiles(AsyncAPIResource):
-    with_raw_response: AsyncFilesWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncFilesWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
+        purpose: FilePurpose,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -309,15 +376,26 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileObject:
-        """Upload a file that can be used across various endpoints/features.
+        """Upload a file that can be used across various endpoints.
+
+        Individual files can be
+        up to 512 MB, and the size of all files uploaded by one organization can be up
+        to 100 GB.
+
+        The Assistants API supports files up to 2 million tokens and of specific file
+        types. See the
+        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
+        details.
 
-        The size of
-        all the files uploaded by one organization can be up to 100 GB.
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
 
-        The size of individual files for can be a maximum of 512MB. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -325,14 +403,10 @@ async def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
-
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
           extra_headers: Send extra headers
 
@@ -349,15 +423,13 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/files",
-            body=maybe_transform(body, file_create_params.FileCreateParams),
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -388,6 +460,8 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return await self._get(
             f"/files/{file_id}",
             options=make_request_options(
@@ -399,6 +473,9 @@ async def retrieve(
     def list(
         self,
         *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         purpose: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -406,11 +483,23 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FileObject, AsyncPage[FileObject]]:
-        """
-        Returns a list of files that belong to the user's organization.
+    ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -423,13 +512,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=AsyncPage[FileObject],
+            page=AsyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -457,6 +554,8 @@ async def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return await self._delete(
             f"/files/{file_id}",
             options=make_request_options(
@@ -475,7 +574,7 @@ async def content(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
 
@@ -488,12 +587,15 @@ async def content(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
         return await self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
     @typing_extensions.deprecated("The `.content()` method should be used instead")
@@ -520,7 +622,8 @@ async def retrieve_content(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"Accept": "application/json", **(extra_headers or {})}
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
         return await self._get(
             f"/files/{file_id}/content",
             options=make_request_options(
@@ -555,43 +658,105 @@ async def wait_for_processing(
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
-        self.create = to_raw_response_wrapper(
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
-        self.content = to_raw_response_wrapper(
+        self.content = _legacy_response.to_raw_response_wrapper(
             files.content,
         )
-        self.retrieve_content = to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            files.retrieve_content  # pyright: ignore[reportDeprecated],
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
 class AsyncFilesWithRawResponse:
     def __init__(self, files: AsyncFiles) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             files.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
-        self.content = async_to_raw_response_wrapper(
+        self.content = async_to_custom_streamed_response_wrapper(
             files.content,
+            AsyncStreamedBinaryAPIResponse,
         )
-        self.retrieve_content = async_to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            files.retrieve_content  # pyright: ignore[reportDeprecated],
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
         )
diff --git a/src/openai/resources/fine_tunes.py b/src/openai/resources/fine_tunes.py
deleted file mode 100644
index 91c8201cbb..0000000000
--- a/src/openai/resources/fine_tunes.py
+++ /dev/null
@@ -1,822 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, List, Union, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import (
-    FineTune,
-    FineTuneEvent,
-    FineTuneEventsListResponse,
-    fine_tune_create_params,
-    fine_tune_list_events_params,
-)
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from .._streaming import Stream, AsyncStream
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import AsyncPaginator, make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
-
-__all__ = ["FineTunes", "AsyncFineTunes"]
-
-
-class FineTunes(SyncAPIResource):
-    with_raw_response: FineTunesWithRawResponse
-
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = FineTunesWithRawResponse(self)
-
-    def create(
-        self,
-        *,
-        training_file: str,
-        batch_size: Optional[int] | NotGiven = NOT_GIVEN,
-        classification_betas: Optional[List[float]] | NotGiven = NOT_GIVEN,
-        classification_n_classes: Optional[int] | NotGiven = NOT_GIVEN,
-        classification_positive_class: Optional[str] | NotGiven = NOT_GIVEN,
-        compute_classification_metrics: Optional[bool] | NotGiven = NOT_GIVEN,
-        hyperparameters: fine_tune_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        learning_rate_multiplier: Optional[float] | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["ada", "babbage", "curie", "davinci"], None] | NotGiven = NOT_GIVEN,
-        prompt_loss_weight: Optional[float] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Creates a job that fine-tunes a specified model from a given dataset.
-
-        Response includes details of the enqueued job including job status and the name
-        of the fine-tuned models once complete.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/legacy-fine-tuning)
-
-        Args:
-          training_file: The ID of an uploaded file that contains training data.
-
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-              for how to upload a file.
-
-              Your dataset must be formatted as a JSONL file, where each training example is a
-              JSON object with the keys "prompt" and "completion". Additionally, you must
-              upload your file with the purpose `fine-tune`.
-
-              See the
-              [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-              for more details.
-
-          batch_size: The batch size to use for training. The batch size is the number of training
-              examples used to train a single forward and backward pass.
-
-              By default, the batch size will be dynamically configured to be ~0.2% of the
-              number of examples in the training set, capped at 256 - in general, we've found
-              that larger batch sizes tend to work better for larger datasets.
-
-          classification_betas: If this is provided, we calculate F-beta scores at the specified beta values.
-              The F-beta score is a generalization of F-1 score. This is only used for binary
-              classification.
-
-              With a beta of 1 (i.e. the F-1 score), precision and recall are given the same
-              weight. A larger beta score puts more weight on recall and less on precision. A
-              smaller beta score puts more weight on precision and less on recall.
-
-          classification_n_classes: The number of classes in a classification task.
-
-              This parameter is required for multiclass classification.
-
-          classification_positive_class: The positive class in binary classification.
-
-              This parameter is needed to generate precision, recall, and F1 metrics when
-              doing binary classification.
-
-          compute_classification_metrics: If set, we calculate classification-specific metrics such as accuracy and F-1
-              score using the validation set at the end of every epoch. These metrics can be
-              viewed in the
-              [results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-
-              In order to compute classification metrics, you must provide a
-              `validation_file`. Additionally, you must specify `classification_n_classes` for
-              multiclass classification or `classification_positive_class` for binary
-              classification.
-
-          hyperparameters: The hyperparameters used for the fine-tuning job.
-
-          learning_rate_multiplier: The learning rate multiplier to use for training. The fine-tuning learning rate
-              is the original learning rate used for pretraining multiplied by this value.
-
-              By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 depending on
-              final `batch_size` (larger learning rates tend to perform better with larger
-              batch sizes). We recommend experimenting with values in the range 0.02 to 0.2 to
-              see what produces the best results.
-
-          model: The name of the base model to fine-tune. You can select one of "ada", "babbage",
-              "curie", "davinci", or a fine-tuned model created after 2022-04-21 and before
-              2023-08-22. To learn more about these models, see the
-              [Models](https://platform.openai.com/docs/models) documentation.
-
-          prompt_loss_weight: The weight to use for loss on the prompt tokens. This controls how much the
-              model tries to learn to generate the prompt (as compared to the completion which
-              always has a weight of 1.0), and can add a stabilizing effect to training when
-              completions are short.
-
-              If prompts are extremely long (relative to completions), it may make sense to
-              reduce this weight so as to avoid over-prioritizing learning the prompt.
-
-          suffix: A string of up to 40 characters that will be added to your fine-tuned model
-              name.
-
-              For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.
-
-          validation_file: The ID of an uploaded file that contains validation data.
-
-              If you provide this file, the data is used to generate validation metrics
-              periodically during fine-tuning. These metrics can be viewed in the
-              [fine-tuning results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-              Your train and validation data should be mutually exclusive.
-
-              Your dataset must be formatted as a JSONL file, where each validation example is
-              a JSON object with the keys "prompt" and "completion". Additionally, you must
-              upload your file with the purpose `fine-tune`.
-
-              See the
-              [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-              for more details.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/fine-tunes",
-            body=maybe_transform(
-                {
-                    "training_file": training_file,
-                    "batch_size": batch_size,
-                    "classification_betas": classification_betas,
-                    "classification_n_classes": classification_n_classes,
-                    "classification_positive_class": classification_positive_class,
-                    "compute_classification_metrics": compute_classification_metrics,
-                    "hyperparameters": hyperparameters,
-                    "learning_rate_multiplier": learning_rate_multiplier,
-                    "model": model,
-                    "prompt_loss_weight": prompt_loss_weight,
-                    "suffix": suffix,
-                    "validation_file": validation_file,
-                },
-                fine_tune_create_params.FineTuneCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    def retrieve(
-        self,
-        fine_tune_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Gets info about the fine-tune job.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/legacy-fine-tuning)
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            f"/fine-tunes/{fine_tune_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[FineTune]:
-        """List your organization's fine-tuning jobs"""
-        return self._get_api_list(
-            "/fine-tunes",
-            page=SyncPage[FineTune],
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            model=FineTune,
-        )
-
-    def cancel(
-        self,
-        fine_tune_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Immediately cancel a fine-tune job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            f"/fine-tunes/{fine_tune_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    @overload
-    def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[True],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> Stream[FineTuneEvent]:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: bool,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse | Stream[FineTuneEvent]:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse | Stream[FineTuneEvent]:
-        return self._get(
-            f"/fine-tunes/{fine_tune_id}/events",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"stream": stream}, fine_tune_list_events_params.FineTuneListEventsParams),
-            ),
-            cast_to=FineTuneEventsListResponse,
-            stream=stream or False,
-            stream_cls=Stream[FineTuneEvent],
-        )
-
-
-class AsyncFineTunes(AsyncAPIResource):
-    with_raw_response: AsyncFineTunesWithRawResponse
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncFineTunesWithRawResponse(self)
-
-    async def create(
-        self,
-        *,
-        training_file: str,
-        batch_size: Optional[int] | NotGiven = NOT_GIVEN,
-        classification_betas: Optional[List[float]] | NotGiven = NOT_GIVEN,
-        classification_n_classes: Optional[int] | NotGiven = NOT_GIVEN,
-        classification_positive_class: Optional[str] | NotGiven = NOT_GIVEN,
-        compute_classification_metrics: Optional[bool] | NotGiven = NOT_GIVEN,
-        hyperparameters: fine_tune_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        learning_rate_multiplier: Optional[float] | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["ada", "babbage", "curie", "davinci"], None] | NotGiven = NOT_GIVEN,
-        prompt_loss_weight: Optional[float] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Creates a job that fine-tunes a specified model from a given dataset.
-
-        Response includes details of the enqueued job including job status and the name
-        of the fine-tuned models once complete.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/legacy-fine-tuning)
-
-        Args:
-          training_file: The ID of an uploaded file that contains training data.
-
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-              for how to upload a file.
-
-              Your dataset must be formatted as a JSONL file, where each training example is a
-              JSON object with the keys "prompt" and "completion". Additionally, you must
-              upload your file with the purpose `fine-tune`.
-
-              See the
-              [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-              for more details.
-
-          batch_size: The batch size to use for training. The batch size is the number of training
-              examples used to train a single forward and backward pass.
-
-              By default, the batch size will be dynamically configured to be ~0.2% of the
-              number of examples in the training set, capped at 256 - in general, we've found
-              that larger batch sizes tend to work better for larger datasets.
-
-          classification_betas: If this is provided, we calculate F-beta scores at the specified beta values.
-              The F-beta score is a generalization of F-1 score. This is only used for binary
-              classification.
-
-              With a beta of 1 (i.e. the F-1 score), precision and recall are given the same
-              weight. A larger beta score puts more weight on recall and less on precision. A
-              smaller beta score puts more weight on precision and less on recall.
-
-          classification_n_classes: The number of classes in a classification task.
-
-              This parameter is required for multiclass classification.
-
-          classification_positive_class: The positive class in binary classification.
-
-              This parameter is needed to generate precision, recall, and F1 metrics when
-              doing binary classification.
-
-          compute_classification_metrics: If set, we calculate classification-specific metrics such as accuracy and F-1
-              score using the validation set at the end of every epoch. These metrics can be
-              viewed in the
-              [results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-
-              In order to compute classification metrics, you must provide a
-              `validation_file`. Additionally, you must specify `classification_n_classes` for
-              multiclass classification or `classification_positive_class` for binary
-              classification.
-
-          hyperparameters: The hyperparameters used for the fine-tuning job.
-
-          learning_rate_multiplier: The learning rate multiplier to use for training. The fine-tuning learning rate
-              is the original learning rate used for pretraining multiplied by this value.
-
-              By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 depending on
-              final `batch_size` (larger learning rates tend to perform better with larger
-              batch sizes). We recommend experimenting with values in the range 0.02 to 0.2 to
-              see what produces the best results.
-
-          model: The name of the base model to fine-tune. You can select one of "ada", "babbage",
-              "curie", "davinci", or a fine-tuned model created after 2022-04-21 and before
-              2023-08-22. To learn more about these models, see the
-              [Models](https://platform.openai.com/docs/models) documentation.
-
-          prompt_loss_weight: The weight to use for loss on the prompt tokens. This controls how much the
-              model tries to learn to generate the prompt (as compared to the completion which
-              always has a weight of 1.0), and can add a stabilizing effect to training when
-              completions are short.
-
-              If prompts are extremely long (relative to completions), it may make sense to
-              reduce this weight so as to avoid over-prioritizing learning the prompt.
-
-          suffix: A string of up to 40 characters that will be added to your fine-tuned model
-              name.
-
-              For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.
-
-          validation_file: The ID of an uploaded file that contains validation data.
-
-              If you provide this file, the data is used to generate validation metrics
-              periodically during fine-tuning. These metrics can be viewed in the
-              [fine-tuning results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-              Your train and validation data should be mutually exclusive.
-
-              Your dataset must be formatted as a JSONL file, where each validation example is
-              a JSON object with the keys "prompt" and "completion". Additionally, you must
-              upload your file with the purpose `fine-tune`.
-
-              See the
-              [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-              for more details.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/fine-tunes",
-            body=maybe_transform(
-                {
-                    "training_file": training_file,
-                    "batch_size": batch_size,
-                    "classification_betas": classification_betas,
-                    "classification_n_classes": classification_n_classes,
-                    "classification_positive_class": classification_positive_class,
-                    "compute_classification_metrics": compute_classification_metrics,
-                    "hyperparameters": hyperparameters,
-                    "learning_rate_multiplier": learning_rate_multiplier,
-                    "model": model,
-                    "prompt_loss_weight": prompt_loss_weight,
-                    "suffix": suffix,
-                    "validation_file": validation_file,
-                },
-                fine_tune_create_params.FineTuneCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    async def retrieve(
-        self,
-        fine_tune_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Gets info about the fine-tune job.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/legacy-fine-tuning)
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            f"/fine-tunes/{fine_tune_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FineTune, AsyncPage[FineTune]]:
-        """List your organization's fine-tuning jobs"""
-        return self._get_api_list(
-            "/fine-tunes",
-            page=AsyncPage[FineTune],
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            model=FineTune,
-        )
-
-    async def cancel(
-        self,
-        fine_tune_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTune:
-        """
-        Immediately cancel a fine-tune job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            f"/fine-tunes/{fine_tune_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTune,
-        )
-
-    @overload
-    async def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[True],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> AsyncStream[FineTuneEvent]:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: bool,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse | AsyncStream[FineTuneEvent]:
-        """
-        Get fine-grained status updates for a fine-tune job.
-
-        Args:
-          stream: Whether to stream events for the fine-tune job. If set to true, events will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available. The stream will terminate with a `data: [DONE]`
-              message when the job is finished (succeeded, cancelled, or failed).
-
-              If set to false, only events generated so far will be returned.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    async def list_events(
-        self,
-        fine_tune_id: str,
-        *,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = 86400,
-    ) -> FineTuneEventsListResponse | AsyncStream[FineTuneEvent]:
-        return await self._get(
-            f"/fine-tunes/{fine_tune_id}/events",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"stream": stream}, fine_tune_list_events_params.FineTuneListEventsParams),
-            ),
-            cast_to=FineTuneEventsListResponse,
-            stream=stream or False,
-            stream_cls=AsyncStream[FineTuneEvent],
-        )
-
-
-class FineTunesWithRawResponse:
-    def __init__(self, fine_tunes: FineTunes) -> None:
-        self.create = to_raw_response_wrapper(
-            fine_tunes.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            fine_tunes.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            fine_tunes.list,
-        )
-        self.cancel = to_raw_response_wrapper(
-            fine_tunes.cancel,
-        )
-        self.list_events = to_raw_response_wrapper(
-            fine_tunes.list_events,
-        )
-
-
-class AsyncFineTunesWithRawResponse:
-    def __init__(self, fine_tunes: AsyncFineTunes) -> None:
-        self.create = async_to_raw_response_wrapper(
-            fine_tunes.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            fine_tunes.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            fine_tunes.list,
-        )
-        self.cancel = async_to_raw_response_wrapper(
-            fine_tunes.cancel,
-        )
-        self.list_events = async_to_raw_response_wrapper(
-            fine_tunes.list_events,
-        )
diff --git a/src/openai/resources/fine_tuning/__init__.py b/src/openai/resources/fine_tuning/__init__.py
index 9133c25d4a..c76af83deb 100644
--- a/src/openai/resources/fine_tuning/__init__.py
+++ b/src/openai/resources/fine_tuning/__init__.py
@@ -1,11 +1,36 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .jobs import Jobs, AsyncJobs, JobsWithRawResponse, AsyncJobsWithRawResponse
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 from .fine_tuning import (
     FineTuning,
     AsyncFineTuning,
     FineTuningWithRawResponse,
     AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
 )
 
 __all__ = [
@@ -13,8 +38,24 @@
     "AsyncJobs",
     "JobsWithRawResponse",
     "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Alpha",
+    "AsyncAlpha",
+    "AlphaWithRawResponse",
+    "AsyncAlphaWithRawResponse",
+    "AlphaWithStreamingResponse",
+    "AsyncAlphaWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
     "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
 ]
diff --git a/src/openai/resources/fine_tuning/alpha/__init__.py b/src/openai/resources/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..8bed8af4fd
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .graders import (
+    Graders,
+    AsyncGraders,
+    GradersWithRawResponse,
+    AsyncGradersWithRawResponse,
+    GradersWithStreamingResponse,
+    AsyncGradersWithStreamingResponse,
+)
+
+__all__ = [
+    "Graders",
+    "AsyncGraders",
+    "GradersWithRawResponse",
+    "AsyncGradersWithRawResponse",
+    "GradersWithStreamingResponse",
+    "AsyncGradersWithStreamingResponse",
+    "Alpha",
+    "AsyncAlpha",
+    "AlphaWithRawResponse",
+    "AsyncAlphaWithRawResponse",
+    "AlphaWithStreamingResponse",
+    "AsyncAlphaWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/alpha/alpha.py b/src/openai/resources/fine_tuning/alpha/alpha.py
new file mode 100644
index 0000000000..54c05fab69
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/alpha.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .graders import (
+    Graders,
+    AsyncGraders,
+    GradersWithRawResponse,
+    AsyncGradersWithRawResponse,
+    GradersWithStreamingResponse,
+    AsyncGradersWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Alpha", "AsyncAlpha"]
+
+
+class Alpha(SyncAPIResource):
+    @cached_property
+    def graders(self) -> Graders:
+        return Graders(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AlphaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AlphaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AlphaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AlphaWithStreamingResponse(self)
+
+
+class AsyncAlpha(AsyncAPIResource):
+    @cached_property
+    def graders(self) -> AsyncGraders:
+        return AsyncGraders(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAlphaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAlphaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAlphaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAlphaWithStreamingResponse(self)
+
+
+class AlphaWithRawResponse:
+    def __init__(self, alpha: Alpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> GradersWithRawResponse:
+        return GradersWithRawResponse(self._alpha.graders)
+
+
+class AsyncAlphaWithRawResponse:
+    def __init__(self, alpha: AsyncAlpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> AsyncGradersWithRawResponse:
+        return AsyncGradersWithRawResponse(self._alpha.graders)
+
+
+class AlphaWithStreamingResponse:
+    def __init__(self, alpha: Alpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> GradersWithStreamingResponse:
+        return GradersWithStreamingResponse(self._alpha.graders)
+
+
+class AsyncAlphaWithStreamingResponse:
+    def __init__(self, alpha: AsyncAlpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> AsyncGradersWithStreamingResponse:
+        return AsyncGradersWithStreamingResponse(self._alpha.graders)
diff --git a/src/openai/resources/fine_tuning/alpha/graders.py b/src/openai/resources/fine_tuning/alpha/graders.py
new file mode 100644
index 0000000000..387e6c72ff
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/graders.py
@@ -0,0 +1,282 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.fine_tuning.alpha import grader_run_params, grader_validate_params
+from ....types.fine_tuning.alpha.grader_run_response import GraderRunResponse
+from ....types.fine_tuning.alpha.grader_validate_response import GraderValidateResponse
+
+__all__ = ["Graders", "AsyncGraders"]
+
+
+class Graders(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> GradersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return GradersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> GradersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return GradersWithStreamingResponse(self)
+
+    def run(
+        self,
+        *,
+        grader: grader_run_params.Grader,
+        model_sample: str,
+        item: object | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GraderRunResponse:
+        """
+        Run a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          model_sample: The model sample to be evaluated. This value will be used to populate the
+              `sample` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+              The `output_json` variable will be populated if the model sample is a valid JSON
+              string.
+
+          item: The dataset item provided to the grader. This will be used to populate the
+              `item` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/alpha/graders/run",
+            body=maybe_transform(
+                {
+                    "grader": grader,
+                    "model_sample": model_sample,
+                    "item": item,
+                },
+                grader_run_params.GraderRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderRunResponse,
+        )
+
+    def validate(
+        self,
+        *,
+        grader: grader_validate_params.Grader,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GraderValidateResponse:
+        """
+        Validate a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/alpha/graders/validate",
+            body=maybe_transform({"grader": grader}, grader_validate_params.GraderValidateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderValidateResponse,
+        )
+
+
+class AsyncGraders(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncGradersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncGradersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncGradersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncGradersWithStreamingResponse(self)
+
+    async def run(
+        self,
+        *,
+        grader: grader_run_params.Grader,
+        model_sample: str,
+        item: object | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GraderRunResponse:
+        """
+        Run a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          model_sample: The model sample to be evaluated. This value will be used to populate the
+              `sample` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+              The `output_json` variable will be populated if the model sample is a valid JSON
+              string.
+
+          item: The dataset item provided to the grader. This will be used to populate the
+              `item` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/alpha/graders/run",
+            body=await async_maybe_transform(
+                {
+                    "grader": grader,
+                    "model_sample": model_sample,
+                    "item": item,
+                },
+                grader_run_params.GraderRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderRunResponse,
+        )
+
+    async def validate(
+        self,
+        *,
+        grader: grader_validate_params.Grader,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GraderValidateResponse:
+        """
+        Validate a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/alpha/graders/validate",
+            body=await async_maybe_transform({"grader": grader}, grader_validate_params.GraderValidateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderValidateResponse,
+        )
+
+
+class GradersWithRawResponse:
+    def __init__(self, graders: Graders) -> None:
+        self._graders = graders
+
+        self.run = _legacy_response.to_raw_response_wrapper(
+            graders.run,
+        )
+        self.validate = _legacy_response.to_raw_response_wrapper(
+            graders.validate,
+        )
+
+
+class AsyncGradersWithRawResponse:
+    def __init__(self, graders: AsyncGraders) -> None:
+        self._graders = graders
+
+        self.run = _legacy_response.async_to_raw_response_wrapper(
+            graders.run,
+        )
+        self.validate = _legacy_response.async_to_raw_response_wrapper(
+            graders.validate,
+        )
+
+
+class GradersWithStreamingResponse:
+    def __init__(self, graders: Graders) -> None:
+        self._graders = graders
+
+        self.run = to_streamed_response_wrapper(
+            graders.run,
+        )
+        self.validate = to_streamed_response_wrapper(
+            graders.validate,
+        )
+
+
+class AsyncGradersWithStreamingResponse:
+    def __init__(self, graders: AsyncGraders) -> None:
+        self._graders = graders
+
+        self.run = async_to_streamed_response_wrapper(
+            graders.run,
+        )
+        self.validate = async_to_streamed_response_wrapper(
+            graders.validate,
+        )
diff --git a/src/openai/resources/fine_tuning/checkpoints/__init__.py b/src/openai/resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fdc37940f9
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Permissions",
+    "AsyncPermissions",
+    "PermissionsWithRawResponse",
+    "AsyncPermissionsWithRawResponse",
+    "PermissionsWithStreamingResponse",
+    "AsyncPermissionsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/checkpoints/checkpoints.py b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
new file mode 100644
index 0000000000..f59976a264
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def permissions(self) -> Permissions:
+        return Permissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CheckpointsWithStreamingResponse(self)
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def permissions(self) -> AsyncPermissions:
+        return AsyncPermissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithRawResponse:
+        return PermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithRawResponse:
+        return AsyncPermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithStreamingResponse:
+        return PermissionsWithStreamingResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithStreamingResponse:
+        return AsyncPermissionsWithStreamingResponse(self._checkpoints.permissions)
diff --git a/src/openai/resources/fine_tuning/checkpoints/permissions.py b/src/openai/resources/fine_tuning/checkpoints/permissions.py
new file mode 100644
index 0000000000..547e42ecac
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/permissions.py
@@ -0,0 +1,419 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncPage, AsyncPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.fine_tuning.checkpoints import permission_create_params, permission_retrieve_params
+from ....types.fine_tuning.checkpoints.permission_create_response import PermissionCreateResponse
+from ....types.fine_tuning.checkpoints.permission_delete_response import PermissionDeleteResponse
+from ....types.fine_tuning.checkpoints.permission_retrieve_response import PermissionRetrieveResponse
+
+__all__ = ["Permissions", "AsyncPermissions"]
+
+
+class Permissions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[PermissionCreateResponse]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=SyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class AsyncPermissions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[PermissionCreateResponse, AsyncPage[PermissionCreateResponse]]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=AsyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    async def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return await self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    async def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return await self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class PermissionsWithRawResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithRawResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class PermissionsWithStreamingResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithStreamingResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = async_to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            permissions.delete,
+        )
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 2e5f36e546..25ae3e8cf4 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -1,43 +1,166 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from .jobs import Jobs, AsyncJobs, JobsWithRawResponse, AsyncJobsWithRawResponse
+from ..._compat import cached_property
+from .jobs.jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
 from ..._resource import SyncAPIResource, AsyncAPIResource
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from .alpha.alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .checkpoints.checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
 
 
 class FineTuning(SyncAPIResource):
-    jobs: Jobs
-    with_raw_response: FineTuningWithRawResponse
+    @cached_property
+    def jobs(self) -> Jobs:
+        return Jobs(self._client)
+
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
+    @cached_property
+    def alpha(self) -> Alpha:
+        return Alpha(self._client)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.jobs = Jobs(client)
-        self.with_raw_response = FineTuningWithRawResponse(self)
+    @cached_property
+    def with_raw_response(self) -> FineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FineTuningWithStreamingResponse(self)
 
 
 class AsyncFineTuning(AsyncAPIResource):
-    jobs: AsyncJobs
-    with_raw_response: AsyncFineTuningWithRawResponse
+    @cached_property
+    def jobs(self) -> AsyncJobs:
+        return AsyncJobs(self._client)
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.jobs = AsyncJobs(client)
-        self.with_raw_response = AsyncFineTuningWithRawResponse(self)
+    @cached_property
+    def alpha(self) -> AsyncAlpha:
+        return AsyncAlpha(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFineTuningWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFineTuningWithStreamingResponse(self)
 
 
 class FineTuningWithRawResponse:
     def __init__(self, fine_tuning: FineTuning) -> None:
-        self.jobs = JobsWithRawResponse(fine_tuning.jobs)
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithRawResponse:
+        return JobsWithRawResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AlphaWithRawResponse:
+        return AlphaWithRawResponse(self._fine_tuning.alpha)
 
 
 class AsyncFineTuningWithRawResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
-        self.jobs = AsyncJobsWithRawResponse(fine_tuning.jobs)
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithRawResponse:
+        return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AsyncAlphaWithRawResponse:
+        return AsyncAlphaWithRawResponse(self._fine_tuning.alpha)
+
+
+class FineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> JobsWithStreamingResponse:
+        return JobsWithStreamingResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AlphaWithStreamingResponse:
+        return AlphaWithStreamingResponse(self._fine_tuning.alpha)
+
+
+class AsyncFineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self._fine_tuning = fine_tuning
+
+    @cached_property
+    def jobs(self) -> AsyncJobsWithStreamingResponse:
+        return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AsyncAlphaWithStreamingResponse:
+        return AsyncAlphaWithStreamingResponse(self._fine_tuning.alpha)
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..f86462e513
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,199 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import checkpoint_list_params
+from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 51%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index 3d9aed8d91..ee21cdd280 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -1,45 +1,73 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Union, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import AsyncPaginator, make_request_options
-from ...types.fine_tuning import (
-    FineTuningJob,
-    FineTuningJobEvent,
-    job_list_params,
-    job_create_params,
-    job_list_events_params,
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
 )
-
-if TYPE_CHECKING:
-    from ..._client import OpenAI, AsyncOpenAI
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params
+from ....types.shared_params.metadata import Metadata
+from ....types.fine_tuning.fine_tuning_job import FineTuningJob
+from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
 
 __all__ = ["Jobs", "AsyncJobs"]
 
 
 class Jobs(SyncAPIResource):
-    with_raw_response: JobsWithRawResponse
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> JobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return JobsWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = JobsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> JobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return JobsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -50,35 +78,60 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuningJob:
         """
-        Creates a job that fine-tunes a specified model from a given dataset.
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
 
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
               for how to upload a file.
 
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -90,7 +143,8 @@ def create(
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -108,6 +162,10 @@ def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -133,7 +191,7 @@ def retrieve(
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers
@@ -144,6 +202,8 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return self._get(
             f"/fine_tuning/jobs/{fine_tuning_job_id}",
             options=make_request_options(
@@ -157,6 +217,7 @@ def list(
         *,
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -172,6 +233,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -192,6 +256,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
@@ -222,6 +287,8 @@ def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return self._post(
             f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
             options=make_request_options(
@@ -259,6 +326,8 @@ def list_events(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return self._get_api_list(
             f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
             page=SyncCursorPage[FineTuningJobEvent],
@@ -278,20 +347,107 @@ def list_events(
             model=FineTuningJobEvent,
         )
 
+    def pause(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Pause a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/pause",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def resume(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Resume a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/resume",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
 
 class AsyncJobs(AsyncAPIResource):
-    with_raw_response: AsyncJobsWithRawResponse
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobsWithRawResponse(self)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncJobsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncJobsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -302,35 +458,60 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuningJob:
         """
-        Creates a job that fine-tunes a specified model from a given dataset.
+        Creates a fine-tuning job which begins the process of creating a new model from
+        a given dataset.
 
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
               for how to upload a file.
 
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
+
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -342,7 +523,8 @@ async def create(
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -355,11 +537,15 @@ async def create(
         """
         return await self._post(
             "/fine_tuning/jobs",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -385,7 +571,7 @@ async def retrieve(
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers
@@ -396,6 +582,8 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return await self._get(
             f"/fine_tuning/jobs/{fine_tuning_job_id}",
             options=make_request_options(
@@ -409,6 +597,7 @@ def list(
         *,
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -424,6 +613,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -444,6 +636,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
@@ -474,6 +667,8 @@ async def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return await self._post(
             f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
             options=make_request_options(
@@ -511,6 +706,8 @@ def list_events(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
         return self._get_api_list(
             f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
             page=AsyncCursorPage[FineTuningJobEvent],
@@ -530,40 +727,192 @@ def list_events(
             model=FineTuningJobEvent,
         )
 
+    async def pause(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Pause a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/pause",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    async def resume(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FineTuningJob:
+        """
+        Resume a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/resume",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
 
 class JobsWithRawResponse:
     def __init__(self, jobs: Jobs) -> None:
-        self.create = to_raw_response_wrapper(
+        self._jobs = jobs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             jobs.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             jobs.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             jobs.list,
         )
-        self.cancel = to_raw_response_wrapper(
+        self.cancel = _legacy_response.to_raw_response_wrapper(
             jobs.cancel,
         )
-        self.list_events = to_raw_response_wrapper(
+        self.list_events = _legacy_response.to_raw_response_wrapper(
             jobs.list_events,
         )
+        self.pause = _legacy_response.to_raw_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = _legacy_response.to_raw_response_wrapper(
+            jobs.resume,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
 
 
 class AsyncJobsWithRawResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._jobs = jobs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list_events,
+        )
+        self.pause = _legacy_response.async_to_raw_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = _legacy_response.async_to_raw_response_wrapper(
+            jobs.resume,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
+
+class JobsWithStreamingResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self._jobs = jobs
+
+        self.create = to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+        self.pause = to_streamed_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = to_streamed_response_wrapper(
+            jobs.resume,
+        )
+
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
+
+class AsyncJobsWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self._jobs = jobs
+
+        self.create = async_to_streamed_response_wrapper(
             jobs.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             jobs.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             jobs.list,
         )
-        self.cancel = async_to_raw_response_wrapper(
+        self.cancel = async_to_streamed_response_wrapper(
             jobs.cancel,
         )
-        self.list_events = async_to_raw_response_wrapper(
+        self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )
+        self.pause = async_to_streamed_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = async_to_streamed_response_wrapper(
+            jobs.resume,
+        )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 94b1bc1fc8..43f6189f91 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -1,42 +1,51 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Union, Mapping, Optional, cast
+from typing import List, Union, Mapping, Optional, cast
 from typing_extensions import Literal
 
 import httpx
 
-from ..types import (
-    ImagesResponse,
-    image_edit_params,
-    image_generate_params,
-    image_create_variation_params,
-)
+from .. import _legacy_response
+from ..types import image_edit_params, image_generate_params, image_create_variation_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import extract_files, maybe_transform, deepcopy_minimal
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from ..types.image_model import ImageModel
+from ..types.images_response import ImagesResponse
 
 __all__ = ["Images", "AsyncImages"]
 
 
 class Images(SyncAPIResource):
-    with_raw_response: ImagesWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ImagesWithRawResponse(self)
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = ImagesWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ImagesWithStreamingResponse(self)
 
     def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -48,8 +57,9 @@ def create_variation(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -58,18 +68,18 @@ def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -90,12 +100,10 @@ def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/variations",
             body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
@@ -109,13 +117,18 @@ def create_variation(
     def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, List[FileTypes]],
         prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
         mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
+        | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -124,34 +137,66 @@ def edit(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -165,21 +210,23 @@ def edit(
             {
                 "image": image,
                 "prompt": prompt,
+                "background": background,
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/edits",
             body=maybe_transform(body, image_edit_params.ImageEditParams),
@@ -194,11 +241,18 @@ def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        moderation: Optional[Literal["low", "auto"]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
         style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -210,35 +264,64 @@ def generate(
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
-          model: The model to use for image generation.
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -253,8 +336,12 @@ def generate(
             body=maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
@@ -271,17 +358,30 @@ def generate(
 
 
 class AsyncImages(AsyncAPIResource):
-    with_raw_response: AsyncImagesWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncImagesWithRawResponse(self)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncImagesWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncImagesWithStreamingResponse(self)
 
     async def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -293,8 +393,9 @@ async def create_variation(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -303,18 +404,18 @@ async def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated.
 
           size: The size of the generated images. Must be one of `256x256`, `512x512`, or
               `1024x1024`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -335,15 +436,13 @@ async def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/variations",
-            body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
+            body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -354,13 +453,18 @@ async def create_variation(
     async def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, List[FileTypes]],
         prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
         mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
+        | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -369,34 +473,66 @@ async def edit(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -410,24 +546,26 @@ async def edit(
             {
                 "image": image,
                 "prompt": prompt,
+                "background": background,
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/edits",
-            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -439,11 +577,18 @@ async def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        moderation: Optional[Literal["low", "auto"]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
         style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -455,35 +600,64 @@ async def generate(
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
 
-          model: The model to use for image generation.
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`.
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          quality: The quality of the image that will be generated.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -495,11 +669,15 @@ async def generate(
         """
         return await self._post(
             "/images/generations",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
@@ -517,25 +695,59 @@ async def generate(
 
 class ImagesWithRawResponse:
     def __init__(self, images: Images) -> None:
-        self.create_variation = to_raw_response_wrapper(
+        self._images = images
+
+        self.create_variation = _legacy_response.to_raw_response_wrapper(
             images.create_variation,
         )
-        self.edit = to_raw_response_wrapper(
+        self.edit = _legacy_response.to_raw_response_wrapper(
             images.edit,
         )
-        self.generate = to_raw_response_wrapper(
+        self.generate = _legacy_response.to_raw_response_wrapper(
             images.generate,
         )
 
 
 class AsyncImagesWithRawResponse:
     def __init__(self, images: AsyncImages) -> None:
-        self.create_variation = async_to_raw_response_wrapper(
+        self._images = images
+
+        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.async_to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.async_to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class ImagesWithStreamingResponse:
+    def __init__(self, images: Images) -> None:
+        self._images = images
+
+        self.create_variation = to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = to_streamed_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithStreamingResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self._images = images
+
+        self.create_variation = async_to_streamed_response_wrapper(
             images.create_variation,
         )
-        self.edit = async_to_raw_response_wrapper(
+        self.edit = async_to_streamed_response_wrapper(
             images.edit,
         )
-        self.generate = async_to_raw_response_wrapper(
+        self.generate = async_to_streamed_response_wrapper(
             images.generate,
         )
diff --git a/src/openai/resources/models.py b/src/openai/resources/models.py
index 2d04bdc5cc..a9693a6b0a 100644
--- a/src/openai/resources/models.py
+++ b/src/openai/resources/models.py
@@ -1,30 +1,44 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 import httpx
 
-from ..types import Model, ModelDeleted
+from .. import _legacy_response
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncPage, AsyncPage
-from .._base_client import AsyncPaginator, make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from ..types.model import Model
+from .._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ..types.model_deleted import ModelDeleted
 
 __all__ = ["Models", "AsyncModels"]
 
 
 class Models(SyncAPIResource):
-    with_raw_response: ModelsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = ModelsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
 
     def retrieve(
         self,
@@ -50,6 +64,8 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return self._get(
             f"/models/{model}",
             options=make_request_options(
@@ -106,6 +122,8 @@ def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return self._delete(
             f"/models/{model}",
             options=make_request_options(
@@ -116,11 +134,24 @@ def delete(
 
 
 class AsyncModels(AsyncAPIResource):
-    with_raw_response: AsyncModelsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncModelsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
         self,
@@ -146,6 +177,8 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return await self._get(
             f"/models/{model}",
             options=make_request_options(
@@ -202,6 +235,8 @@ async def delete(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
         return await self._delete(
             f"/models/{model}",
             options=make_request_options(
@@ -213,25 +248,59 @@ async def delete(
 
 class ModelsWithRawResponse:
     def __init__(self, models: Models) -> None:
-        self.retrieve = to_raw_response_wrapper(
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             models.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             models.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             models.delete,
         )
 
 
 class AsyncModelsWithRawResponse:
     def __init__(self, models: AsyncModels) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
             models.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             models.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             models.delete,
         )
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index 12a7c68a7b..f7a8b52c23 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -1,37 +1,51 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, List, Union
-from typing_extensions import Literal
+from typing import List, Union, Iterable
 
 import httpx
 
-from ..types import ModerationCreateResponse, moderation_create_params
+from .. import _legacy_response
+from ..types import moderation_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import make_request_options
-
-if TYPE_CHECKING:
-    from .._client import OpenAI, AsyncOpenAI
+from ..types.moderation_model import ModerationModel
+from ..types.moderation_create_response import ModerationCreateResponse
+from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["Moderations", "AsyncModerations"]
 
 
 class Moderations(SyncAPIResource):
-    with_raw_response: ModerationsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-    def __init__(self, client: OpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = ModerationsWithRawResponse(self)
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ModerationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ModerationsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -39,20 +53,19 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text violates OpenAI's Content Policy
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
@@ -79,17 +92,30 @@ def create(
 
 
 class AsyncModerations(AsyncAPIResource):
-    with_raw_response: AsyncModerationsWithRawResponse
+    @cached_property
+    def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModerationsWithRawResponse(self)
 
-    def __init__(self, client: AsyncOpenAI) -> None:
-        super().__init__(client)
-        self.with_raw_response = AsyncModerationsWithRawResponse(self)
+    @cached_property
+    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncModerationsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -97,20 +123,19 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text violates OpenAI's Content Policy
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
@@ -122,7 +147,7 @@ async def create(
         """
         return await self._post(
             "/moderations",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "input": input,
                     "model": model,
@@ -138,13 +163,35 @@ async def create(
 
 class ModerationsWithRawResponse:
     def __init__(self, moderations: Moderations) -> None:
-        self.create = to_raw_response_wrapper(
+        self._moderations = moderations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
             moderations.create,
         )
 
 
 class AsyncModerationsWithRawResponse:
     def __init__(self, moderations: AsyncModerations) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self._moderations = moderations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class ModerationsWithStreamingResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self._moderations = moderations
+
+        self.create = to_streamed_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithStreamingResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self._moderations = moderations
+
+        self.create = async_to_streamed_response_wrapper(
             moderations.create,
         )
diff --git a/src/openai/resources/responses/__init__.py b/src/openai/resources/responses/__init__.py
new file mode 100644
index 0000000000..ad19218b01
--- /dev/null
+++ b/src/openai/resources/responses/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "InputItems",
+    "AsyncInputItems",
+    "InputItemsWithRawResponse",
+    "AsyncInputItemsWithRawResponse",
+    "InputItemsWithStreamingResponse",
+    "AsyncInputItemsWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
+]
diff --git a/src/openai/resources/responses/input_items.py b/src/openai/resources/responses/input_items.py
new file mode 100644
index 0000000000..a425a65c3e
--- /dev/null
+++ b/src/openai/resources/responses/input_items.py
@@ -0,0 +1,234 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.responses import input_item_list_params
+from ...types.responses.response_item import ResponseItem
+from ...types.responses.response_includable import ResponseIncludable
+
+__all__ = ["InputItems", "AsyncInputItems"]
+
+
+class InputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ResponseItem]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=SyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class AsyncInputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ResponseItem, AsyncCursorPage[ResponseItem]]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=AsyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class InputItemsWithRawResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithRawResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class InputItemsWithStreamingResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = to_streamed_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithStreamingResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = async_to_streamed_response_wrapper(
+            input_items.list,
+        )
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
new file mode 100644
index 0000000000..aaf2088f38
--- /dev/null
+++ b/src/openai/resources/responses/responses.py
@@ -0,0 +1,2694 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import is_given, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
+from ..._base_client import make_request_options
+from ...types.responses import response_create_params, response_retrieve_params
+from ...lib._parsing._responses import (
+    TextFormatT,
+    parse_response,
+    type_to_text_format_param as _type_to_text_format_param,
+)
+from ...types.shared.chat_model import ChatModel
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam, ParseableToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.parsed_response import ParsedResponse
+from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_prompt_param import ResponsePromptParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        stream: Literal[True],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        stream: bool,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    def create(
+        self,
+        *,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "background": background,
+                    "include": include,
+                    "input": input,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "prompt": prompt,
+                    "reasoning": reasoning,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    @overload
+    def stream(
+        self,
+        *,
+        response_id: str,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]: ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]: ...
+
+    def stream(
+        self,
+        *,
+        response_id: str | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel] | NotGiven = NOT_GIVEN,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]:
+        new_response_args = {
+            "input": input,
+            "model": model,
+            "include": include,
+            "instructions": instructions,
+            "max_output_tokens": max_output_tokens,
+            "metadata": metadata,
+            "parallel_tool_calls": parallel_tool_calls,
+            "previous_response_id": previous_response_id,
+            "reasoning": reasoning,
+            "store": store,
+            "temperature": temperature,
+            "text": text,
+            "tool_choice": tool_choice,
+            "top_p": top_p,
+            "truncation": truncation,
+            "user": user,
+            "background": background,
+        }
+        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
+
+        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
+            raise ValueError(
+                "Cannot provide both response_id/starting_after can't be provided together with "
+                + ", ".join(new_response_args_names)
+            )
+        tools = _make_tools(tools)
+        if len(new_response_args_names) > 0:
+            if not is_given(input):
+                raise ValueError("input must be provided when creating a new response")
+
+            if not is_given(model):
+                raise ValueError("model must be provided when creating a new response")
+
+            if is_given(text_format):
+                if not text:
+                    text = {}
+
+                if "format" in text:
+                    raise TypeError("Cannot mix and match text.format with text_format")
+
+                text["format"] = _type_to_text_format_param(text_format)
+
+            api_request: partial[Stream[ResponseStreamEvent]] = partial(
+                self.create,
+                input=input,
+                model=model,
+                tools=tools,
+                include=include,
+                instructions=instructions,
+                max_output_tokens=max_output_tokens,
+                metadata=metadata,
+                parallel_tool_calls=parallel_tool_calls,
+                previous_response_id=previous_response_id,
+                store=store,
+                stream=True,
+                temperature=temperature,
+                text=text,
+                tool_choice=tool_choice,
+                reasoning=reasoning,
+                top_p=top_p,
+                truncation=truncation,
+                user=user,
+                background=background,
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
+
+            return ResponseStreamManager(api_request, text_format=text_format, input_tools=tools, starting_after=None)
+        else:
+            if not is_given(response_id):
+                raise ValueError("id must be provided when streaming an existing response")
+
+            return ResponseStreamManager(
+                lambda: self.retrieve(
+                    response_id=response_id,
+                    stream=True,
+                    include=include or [],
+                    extra_headers=extra_headers,
+                    extra_query=extra_query,
+                    extra_body=extra_body,
+                    starting_after=NOT_GIVEN,
+                    timeout=timeout,
+                ),
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=starting_after if is_given(starting_after) else None,
+            )
+
+    def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response: ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]: ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]: ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool = False,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "include": include,
+                        "starting_after": starting_after,
+                        "stream": stream,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
+                ),
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def cancel(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Cancels a model response with the given ID.
+
+        Only responses created with the
+        `background` parameter set to `true` can be cancelled.
+        [Learn more](https://platform.openai.com/docs/guides/background).
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._post(
+            f"/responses/{response_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        stream: Literal[True],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        stream: bool,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the requset will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                'priority', then the request will be processed with the corresponding service
+                tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+                Priority processing.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    async def create(
+        self,
+        *,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: ResponsesModel | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "background": background,
+                    "include": include,
+                    "input": input,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "prompt": prompt,
+                    "reasoning": reasoning,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    @overload
+    def stream(
+        self,
+        *,
+        response_id: str,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]: ...
+
+    @overload
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]: ...
+
+    def stream(
+        self,
+        *,
+        response_id: str | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel] | NotGiven = NOT_GIVEN,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]:
+        new_response_args = {
+            "input": input,
+            "model": model,
+            "include": include,
+            "instructions": instructions,
+            "max_output_tokens": max_output_tokens,
+            "metadata": metadata,
+            "parallel_tool_calls": parallel_tool_calls,
+            "previous_response_id": previous_response_id,
+            "reasoning": reasoning,
+            "store": store,
+            "temperature": temperature,
+            "text": text,
+            "tool_choice": tool_choice,
+            "top_p": top_p,
+            "truncation": truncation,
+            "user": user,
+            "background": background,
+        }
+        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
+
+        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
+            raise ValueError(
+                "Cannot provide both response_id/starting_after can't be provided together with "
+                + ", ".join(new_response_args_names)
+            )
+
+        tools = _make_tools(tools)
+        if len(new_response_args_names) > 0:
+            if isinstance(input, NotGiven):
+                raise ValueError("input must be provided when creating a new response")
+
+            if not is_given(model):
+                raise ValueError("model must be provided when creating a new response")
+
+            if is_given(text_format):
+                if not text:
+                    text = {}
+
+                if "format" in text:
+                    raise TypeError("Cannot mix and match text.format with text_format")
+
+                text["format"] = _type_to_text_format_param(text_format)
+
+            api_request = self.create(
+                input=input,
+                model=model,
+                stream=True,
+                tools=tools,
+                include=include,
+                instructions=instructions,
+                max_output_tokens=max_output_tokens,
+                metadata=metadata,
+                parallel_tool_calls=parallel_tool_calls,
+                previous_response_id=previous_response_id,
+                store=store,
+                temperature=temperature,
+                text=text,
+                tool_choice=tool_choice,
+                reasoning=reasoning,
+                top_p=top_p,
+                truncation=truncation,
+                user=user,
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
+
+            return AsyncResponseStreamManager(
+                api_request,
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=None,
+            )
+        else:
+            if isinstance(response_id, NotGiven):
+                raise ValueError("response_id must be provided when streaming an existing response")
+
+            api_request = self.retrieve(
+                response_id,
+                stream=True,
+                include=include or [],
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
+            return AsyncResponseStreamManager(
+                api_request,
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=starting_after if is_given(starting_after) else None,
+            )
+
+    async def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return await self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response: ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]: ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]: ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool = False,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "include": include,
+                        "starting_after": starting_after,
+                        "stream": stream,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
+                ),
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def cancel(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Cancels a model response with the given ID.
+
+        Only responses created with the
+        `background` parameter set to `true` can be cancelled.
+        [Learn more](https://platform.openai.com/docs/guides/background).
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._post(
+            f"/responses/{response_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            responses.cancel,
+        )
+        self.parse = _legacy_response.to_raw_response_wrapper(
+            responses.parse,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            responses.cancel,
+        )
+        self.parse = _legacy_response.async_to_raw_response_wrapper(
+            responses.parse,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            responses.cancel,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            responses.cancel,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
+
+
+def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven:
+    if not is_given(tools):
+        return NOT_GIVEN
+
+    converted_tools: List[ToolParam] = []
+    for tool in tools:
+        if tool["type"] != "function":
+            converted_tools.append(tool)
+            continue
+
+        if "function" not in tool:
+            # standard Responses API case
+            converted_tools.append(tool)
+            continue
+
+        function = cast(Any, tool)["function"]  # pyright: ignore[reportUnnecessaryCast]
+        if not isinstance(function, PydanticFunctionTool):
+            raise Exception(
+                "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
+            )
+
+        assert "parameters" in function
+        new_tool = ResponsesPydanticFunctionTool(
+            {
+                "type": "function",
+                "name": function["name"],
+                "description": function.get("description"),
+                "parameters": function["parameters"],
+                "strict": function.get("strict") or False,
+            },
+            function.model,
+        )
+
+        converted_tools.append(new_tool.cast())
+
+    return converted_tools
diff --git a/src/openai/resources/uploads/__init__.py b/src/openai/resources/uploads/__init__.py
new file mode 100644
index 0000000000..12d1056f9e
--- /dev/null
+++ b/src/openai/resources/uploads/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+
+__all__ = [
+    "Parts",
+    "AsyncParts",
+    "PartsWithRawResponse",
+    "AsyncPartsWithRawResponse",
+    "PartsWithStreamingResponse",
+    "AsyncPartsWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+]
diff --git a/src/openai/resources/uploads/parts.py b/src/openai/resources/uploads/parts.py
new file mode 100644
index 0000000000..a32f4eb1d2
--- /dev/null
+++ b/src/openai/resources/uploads/parts.py
@@ -0,0 +1,205 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.uploads import part_create_params
+from ...types.uploads.upload_part import UploadPart
+
+__all__ = ["Parts", "AsyncParts"]
+
+
+class Parts(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PartsWithStreamingResponse(self)
+
+    def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            f"/uploads/{upload_id}/parts",
+            body=maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class AsyncParts(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPartsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            f"/uploads/{upload_id}/parts",
+            body=await async_maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class PartsWithRawResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithRawResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class PartsWithStreamingResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = to_streamed_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithStreamingResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = async_to_streamed_response_wrapper(
+            parts.create,
+        )
diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
new file mode 100644
index 0000000000..ecfcee4800
--- /dev/null
+++ b/src/openai/resources/uploads/uploads.py
@@ -0,0 +1,711 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import io
+import os
+import logging
+import builtins
+from typing import List, overload
+from pathlib import Path
+
+import anyio
+import httpx
+
+from ... import _legacy_response
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from ...types import FilePurpose, upload_create_params, upload_complete_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.upload import Upload
+from ...types.file_purpose import FilePurpose
+
+__all__ = ["Uploads", "AsyncUploads"]
+
+
+# 64MB
+DEFAULT_PART_SIZE = 64 * 1024 * 1024
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Uploads(SyncAPIResource):
+    @cached_property
+    def parts(self) -> Parts:
+        return Parts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> UploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return UploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> UploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return UploadsWithStreamingResponse(self)
+
+    @overload
+    def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str],
+        mime_type: str,
+        purpose: FilePurpose,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits a file into multiple 64MB parts and uploads them sequentially."""
+
+    @overload
+    def upload_file_chunked(
+        self,
+        *,
+        file: bytes,
+        filename: str,
+        bytes: int,
+        mime_type: str,
+        purpose: FilePurpose,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
+
+    def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str] | bytes,
+        mime_type: str,
+        purpose: FilePurpose,
+        filename: str | None = None,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits the given file into multiple parts and uploads them sequentially.
+
+        ```py
+        from pathlib import Path
+
+        client.uploads.upload_file(
+            file=Path("my-paper.pdf"),
+            mime_type="pdf",
+            purpose="assistants",
+        )
+        ```
+        """
+        if isinstance(file, builtins.bytes):
+            if filename is None:
+                raise TypeError("The `filename` argument must be given for in-memory files")
+
+            if bytes is None:
+                raise TypeError("The `bytes` argument must be given for in-memory files")
+        else:
+            if not isinstance(file, Path):
+                file = Path(file)
+
+            if not filename:
+                filename = file.name
+
+            if bytes is None:
+                bytes = file.stat().st_size
+
+        upload = self.create(
+            bytes=bytes,
+            filename=filename,
+            mime_type=mime_type,
+            purpose=purpose,
+        )
+
+        part_ids: list[str] = []
+
+        if part_size is None:
+            part_size = DEFAULT_PART_SIZE
+
+        if isinstance(file, builtins.bytes):
+            buf: io.FileIO | io.BytesIO = io.BytesIO(file)
+        else:
+            buf = io.FileIO(file)
+
+        try:
+            while True:
+                data = buf.read(part_size)
+                if not data:
+                    # EOF
+                    break
+
+                part = self.parts.create(upload_id=upload.id, data=data)
+                log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                part_ids.append(part.id)
+        except Exception:
+            buf.close()
+            raise
+
+        return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
+
+    def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/uploads",
+            body=maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/complete",
+            body=maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class AsyncUploads(AsyncAPIResource):
+    @cached_property
+    def parts(self) -> AsyncParts:
+        return AsyncParts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncUploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncUploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncUploadsWithStreamingResponse(self)
+
+    @overload
+    async def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str],
+        mime_type: str,
+        purpose: FilePurpose,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits a file into multiple 64MB parts and uploads them sequentially."""
+
+    @overload
+    async def upload_file_chunked(
+        self,
+        *,
+        file: bytes,
+        filename: str,
+        bytes: int,
+        mime_type: str,
+        purpose: FilePurpose,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
+
+    async def upload_file_chunked(
+        self,
+        *,
+        file: os.PathLike[str] | bytes,
+        mime_type: str,
+        purpose: FilePurpose,
+        filename: str | None = None,
+        bytes: int | None = None,
+        part_size: int | None = None,
+        md5: str | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Splits the given file into multiple parts and uploads them sequentially.
+
+        ```py
+        from pathlib import Path
+
+        client.uploads.upload_file(
+            file=Path("my-paper.pdf"),
+            mime_type="pdf",
+            purpose="assistants",
+        )
+        ```
+        """
+        if isinstance(file, builtins.bytes):
+            if filename is None:
+                raise TypeError("The `filename` argument must be given for in-memory files")
+
+            if bytes is None:
+                raise TypeError("The `bytes` argument must be given for in-memory files")
+        else:
+            if not isinstance(file, anyio.Path):
+                file = anyio.Path(file)
+
+            if not filename:
+                filename = file.name
+
+            if bytes is None:
+                stat = await file.stat()
+                bytes = stat.st_size
+
+        upload = await self.create(
+            bytes=bytes,
+            filename=filename,
+            mime_type=mime_type,
+            purpose=purpose,
+        )
+
+        part_ids: list[str] = []
+
+        if part_size is None:
+            part_size = DEFAULT_PART_SIZE
+
+        if isinstance(file, anyio.Path):
+            fd = await file.open("rb")
+            async with fd:
+                while True:
+                    data = await fd.read(part_size)
+                    if not data:
+                        # EOF
+                        break
+
+                    part = await self.parts.create(upload_id=upload.id, data=data)
+                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                    part_ids.append(part.id)
+        else:
+            buf = io.BytesIO(file)
+
+            try:
+                while True:
+                    data = buf.read(part_size)
+                    if not data:
+                        # EOF
+                        break
+
+                    part = await self.parts.create(upload_id=upload.id, data=data)
+                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
+                    part_ids.append(part.id)
+            except Exception:
+                buf.close()
+                raise
+
+        return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
+
+    async def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/uploads",
+            body=await async_maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/complete",
+            body=await async_maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class UploadsWithRawResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithRawResponse:
+        return PartsWithRawResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithRawResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.async_to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithRawResponse:
+        return AsyncPartsWithRawResponse(self._uploads.parts)
+
+
+class UploadsWithStreamingResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithStreamingResponse:
+        return PartsWithStreamingResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithStreamingResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = async_to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = async_to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithStreamingResponse:
+        return AsyncPartsWithStreamingResponse(self._uploads.parts)
diff --git a/src/openai/resources/vector_stores/__init__.py b/src/openai/resources/vector_stores/__init__.py
new file mode 100644
index 0000000000..96ae16c302
--- /dev/null
+++ b/src/openai/resources/vector_stores/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "FileBatches",
+    "AsyncFileBatches",
+    "FileBatchesWithRawResponse",
+    "AsyncFileBatchesWithRawResponse",
+    "FileBatchesWithStreamingResponse",
+    "AsyncFileBatchesWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
+]
diff --git a/src/openai/resources/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py
new file mode 100644
index 0000000000..4dd4430b71
--- /dev/null
+++ b/src/openai/resources/vector_stores/file_batches.py
@@ -0,0 +1,797 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import asyncio
+from typing import Dict, List, Iterable, Optional
+from typing_extensions import Union, Literal
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import httpx
+import sniffio
+
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import is_given, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.file_object import FileObject
+from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+__all__ = ["FileBatches", "AsyncFileBatches"]
+
+
+class FileBatches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FileBatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        results: list[FileObject] = []
+
+        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+            futures: list[Future[FileObject]] = [
+                executor.submit(
+                    self._client.files.create,
+                    file=file,
+                    purpose="assistants",
+                )
+                for file in files
+            ]
+
+        for future in as_completed(futures):
+            exc = future.exception()
+            if exc:
+                raise exc
+
+            results.append(future.result())
+
+        batch = self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in results)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class AsyncFileBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFileBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFileBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """
+        Retrieves a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Cancel a vector store file batch.
+
+        This attempts to cancel the processing of
+        files in this batch as soon as possible.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatch,
+        )
+
+    async def create_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: List[str],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Create a vector store batch and poll until all files have been processed."""
+        batch = await self.create(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            chunking_strategy=chunking_strategy,
+        )
+        # TODO: don't poll unless necessary??
+        return await self.poll(
+            batch.id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def list_files(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_files_params.FileBatchListFilesParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def poll(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Wait for the given file batch to be processed.
+
+        Note: this will return even if one of the files failed to process, you need to
+        check batch.file_counts.failed_count to handle this case.
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                batch_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            batch = response.parse()
+            if batch.file_counts.in_progress > 0:
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+                continue
+
+            return batch
+
+    async def upload_and_poll(
+        self,
+        vector_store_id: str,
+        *,
+        files: Iterable[FileTypes],
+        max_concurrency: int = 5,
+        file_ids: List[str] = [],
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileBatch:
+        """Uploads the given files concurrently and then creates a vector store file batch.
+
+        If you've already uploaded certain files that you want to include in this batch
+        then you can pass their IDs through the `file_ids` argument.
+
+        By default, if any file upload fails then an exception will be eagerly raised.
+
+        The number of concurrency uploads is configurable using the `max_concurrency`
+        parameter.
+
+        Note: this method only supports `asyncio` or `trio` as the backing async
+        runtime.
+        """
+        uploaded_files: list[FileObject] = []
+
+        async_library = sniffio.current_async_library()
+
+        if async_library == "asyncio":
+
+            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
+                async with semaphore:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            tasks = [asyncio_upload_file(semaphore, file) for file in files]
+
+            await asyncio.gather(*tasks)
+        elif async_library == "trio":
+            # We only import if the library is being used.
+            # We support Python 3.7 so are using an older version of trio that does not have type information
+            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
+
+            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
+                async with limiter:
+                    file_obj = await self._client.files.create(
+                        file=file,
+                        purpose="assistants",
+                    )
+                    uploaded_files.append(file_obj)
+
+            limiter = trio.CapacityLimiter(max_concurrency)
+
+            async with trio.open_nursery() as nursery:
+                for file in files:
+                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
+        else:
+            raise RuntimeError(
+                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
+            )
+
+        batch = await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+        return batch
+
+
+class FileBatchesWithRawResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = _legacy_response.async_to_raw_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class FileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: FileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
+
+
+class AsyncFileBatchesWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatches) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+        self.list_files = async_to_streamed_response_wrapper(
+            file_batches.list_files,
+        )
diff --git a/src/openai/resources/vector_stores/files.py b/src/openai/resources/vector_stores/files.py
new file mode 100644
index 0000000000..f860384629
--- /dev/null
+++ b/src/openai/resources/vector_stores/files.py
@@ -0,0 +1,929 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Dict, Union, Optional
+from typing_extensions import Literal, assert_never
+
+import httpx
+
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import is_given, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_list_params, file_create_params, file_update_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.file_content_response import FileContentResponse
+from ...types.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=SyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)
+
+    def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = self._client.files.create(file=file, purpose="assistants")
+        return self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            chunking_strategy=chunking_strategy,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[FileContentResponse]:
+        """
+        Retrieve the parsed contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=SyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Create a vector store file by attaching a
+        [File](https://platform.openai.com/docs/api-reference/files) to a
+        [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+
+        Args:
+          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+              vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Retrieves a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    async def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=await async_maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+    def list(
+        self,
+        vector_store_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files",
+            page=AsyncCursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFileDeleted:
+        """Delete a vector store file.
+
+        This will remove the file from the vector store but
+        the file itself will not be deleted. To delete the file, use the
+        [delete file](https://platform.openai.com/docs/api-reference/files/delete)
+        endpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileDeleted,
+        )
+
+    async def create_and_poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Attach a file to the given vector store and wait for it to be processed."""
+        await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)
+
+        return await self.poll(
+            file_id,
+            vector_store_id=vector_store_id,
+            poll_interval_ms=poll_interval_ms,
+        )
+
+    async def poll(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Wait for the vector store file to finish processing.
+
+        Note: this will return even if the file failed to process, you need to check
+        file.last_error and file.status to handle these cases
+        """
+        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
+        if is_given(poll_interval_ms):
+            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+
+        while True:
+            response = await self.with_raw_response.retrieve(
+                file_id,
+                vector_store_id=vector_store_id,
+                extra_headers=headers,
+            )
+
+            file = response.parse()
+            if file.status == "in_progress":
+                if not is_given(poll_interval_ms):
+                    from_header = response.headers.get("openai-poll-after-ms")
+                    if from_header is not None:
+                        poll_interval_ms = int(from_header)
+                    else:
+                        poll_interval_ms = 1000
+
+                await self._sleep(poll_interval_ms / 1000)
+            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
+                return file
+            else:
+                if TYPE_CHECKING:  # type: ignore[unreachable]
+                    assert_never(file.status)
+                else:
+                    return file
+
+    async def upload(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Upload a file to the `files` API and then attach it to the given vector store.
+
+        Note the file will be asynchronously processed (you can use the alternative
+        polling helper method to wait for processing to complete).
+        """
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create(
+            vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy
+        )
+
+    async def upload_and_poll(
+        self,
+        *,
+        vector_store_id: str,
+        file: FileTypes,
+        poll_interval_ms: int | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """Add a file to a vector store and poll until processing is complete."""
+        file_obj = await self._client.files.create(file=file, purpose="assistants")
+        return await self.create_and_poll(
+            vector_store_id=vector_store_id,
+            file_id=file_obj.id,
+            poll_interval_ms=poll_interval_ms,
+            chunking_strategy=chunking_strategy,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileContentResponse, AsyncPage[FileContentResponse]]:
+        """
+        Retrieve the parsed contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=AsyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            files.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            files.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            files.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_streamed_response_wrapper(
+            files.content,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            files.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_streamed_response_wrapper(
+            files.content,
+        )
diff --git a/src/openai/resources/vector_stores/vector_stores.py b/src/openai/resources/vector_stores/vector_stores.py
new file mode 100644
index 0000000000..9fc17b183b
--- /dev/null
+++ b/src/openai/resources/vector_stores/vector_stores.py
@@ -0,0 +1,865 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...types import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_search_params,
+    vector_store_update_params,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from .file_batches import (
+    FileBatches,
+    AsyncFileBatches,
+    FileBatchesWithRawResponse,
+    AsyncFileBatchesWithRawResponse,
+    FileBatchesWithStreamingResponse,
+    AsyncFileBatchesWithStreamingResponse,
+)
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_store import VectorStore
+from ...types.vector_store_deleted import VectorStoreDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_store_search_response import VectorStoreSearchResponse
+
+__all__ = ["VectorStores", "AsyncVectorStores"]
+
+
+class VectorStores(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def file_batches(self) -> FileBatches:
+        return FileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return VectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return VectorStoresWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/vector_stores",
+            body=maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[VectorStore]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=SyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[VectorStoreSearchResponse]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=SyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
+
+class AsyncVectorStores(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatches:
+        return AsyncFileBatches(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVectorStoresWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncVectorStoresWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
+        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
+        file_ids: List[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Create a vector store.
+
+        Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          expires_after: The expiration policy for a vector store.
+
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/vector_stores",
+            body=await async_maybe_transform(
+                {
+                    "chunking_strategy": chunking_strategy,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_create_params.VectorStoreCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def retrieve(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Retrieves a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    async def update(
+        self,
+        vector_store_id: str,
+        *,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStore:
+        """
+        Modifies a vector store.
+
+        Args:
+          expires_after: The expiration policy for a vector store.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the vector store.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                vector_store_update_params.VectorStoreUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStore,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
+        """Returns a list of vector stores.
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            "/vector_stores",
+            page=AsyncCursorPage[VectorStore],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    vector_store_list_params.VectorStoreListParams,
+                ),
+            ),
+            model=VectorStore,
+        )
+
+    async def delete(
+        self,
+        vector_store_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreDeleted:
+        """
+        Delete a vector store.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._delete(
+            f"/vector_stores/{vector_store_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreDeleted,
+        )
+
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreSearchResponse, AsyncPage[VectorStoreSearchResponse]]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=AsyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
+
+class VectorStoresWithRawResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = _legacy_response.to_raw_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithRawResponse:
+        return FileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithRawResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithRawResponse:
+        return AsyncFileBatchesWithRawResponse(self._vector_stores.file_batches)
+
+
+class VectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: VectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = to_streamed_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> FileBatchesWithStreamingResponse:
+        return FileBatchesWithStreamingResponse(self._vector_stores.file_batches)
+
+
+class AsyncVectorStoresWithStreamingResponse:
+    def __init__(self, vector_stores: AsyncVectorStores) -> None:
+        self._vector_stores = vector_stores
+
+        self.create = async_to_streamed_response_wrapper(
+            vector_stores.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            vector_stores.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            vector_stores.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            vector_stores.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            vector_stores.delete,
+        )
+        self.search = async_to_streamed_response_wrapper(
+            vector_stores.search,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesWithStreamingResponse:
+        return AsyncFileBatchesWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/src/openai/resources/webhooks.py b/src/openai/resources/webhooks.py
new file mode 100644
index 0000000000..3e13d3faae
--- /dev/null
+++ b/src/openai/resources/webhooks.py
@@ -0,0 +1,210 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import hmac
+import json
+import time
+import base64
+import hashlib
+from typing import cast
+
+from .._types import HeadersLike
+from .._utils import get_required_header
+from .._models import construct_type
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._exceptions import InvalidWebhookSignatureError
+from ..types.webhooks.unwrap_webhook_event import UnwrapWebhookEvent
+
+__all__ = ["Webhooks", "AsyncWebhooks"]
+
+
+class Webhooks(SyncAPIResource):
+    def unwrap(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+    ) -> UnwrapWebhookEvent:
+        """Validates that the given payload was sent by OpenAI and parses the payload."""
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
+
+    def verify_signature(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+        tolerance: int = 300,
+    ) -> None:
+        """Validates whether or not the webhook payload was sent by OpenAI.
+
+        Args:
+            payload: The webhook payload
+            headers: The webhook headers
+            secret: The webhook secret (optional, will use client secret if not provided)
+            tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+        """
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        if secret is None:
+            raise ValueError(
+                "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+                "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+            )
+
+        signature_header = get_required_header(headers, "webhook-signature")
+        timestamp = get_required_header(headers, "webhook-timestamp")
+        webhook_id = get_required_header(headers, "webhook-id")
+
+        # Validate timestamp to prevent replay attacks
+        try:
+            timestamp_seconds = int(timestamp)
+        except ValueError:
+            raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+        now = int(time.time())
+
+        if now - timestamp_seconds > tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+        if timestamp_seconds > now + tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+        # Extract signatures from v1,<base64> format
+        # The signature header can have multiple values, separated by spaces.
+        # Each value is in the format v1,<base64>. We should accept if any match.
+        signatures: list[str] = []
+        for part in signature_header.split():
+            if part.startswith("v1,"):
+                signatures.append(part[3:])
+            else:
+                signatures.append(part)
+
+        # Decode the secret if it starts with whsec_
+        if secret.startswith("whsec_"):
+            decoded_secret = base64.b64decode(secret[6:])
+        else:
+            decoded_secret = secret.encode()
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+        # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+        signed_payload = f"{webhook_id}.{timestamp}.{body}"
+        expected_signature = base64.b64encode(
+            hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+        ).decode()
+
+        # Accept if any signature matches
+        if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+            raise InvalidWebhookSignatureError(
+                "The given webhook signature does not match the expected signature"
+            ) from None
+
+
+class AsyncWebhooks(AsyncAPIResource):
+    def unwrap(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+    ) -> UnwrapWebhookEvent:
+        """Validates that the given payload was sent by OpenAI and parses the payload."""
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(body),
+            ),
+        )
+
+    def verify_signature(
+        self,
+        payload: str | bytes,
+        headers: HeadersLike,
+        *,
+        secret: str | None = None,
+        tolerance: int = 300,
+    ) -> None:
+        """Validates whether or not the webhook payload was sent by OpenAI.
+
+        Args:
+            payload: The webhook payload
+            headers: The webhook headers
+            secret: The webhook secret (optional, will use client secret if not provided)
+            tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+        """
+        if secret is None:
+            secret = self._client.webhook_secret
+
+        if secret is None:
+            raise ValueError(
+                "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+                "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+            ) from None
+
+        signature_header = get_required_header(headers, "webhook-signature")
+        timestamp = get_required_header(headers, "webhook-timestamp")
+        webhook_id = get_required_header(headers, "webhook-id")
+
+        # Validate timestamp to prevent replay attacks
+        try:
+            timestamp_seconds = int(timestamp)
+        except ValueError:
+            raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+        now = int(time.time())
+
+        if now - timestamp_seconds > tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+        if timestamp_seconds > now + tolerance:
+            raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+        # Extract signatures from v1,<base64> format
+        # The signature header can have multiple values, separated by spaces.
+        # Each value is in the format v1,<base64>. We should accept if any match.
+        signatures: list[str] = []
+        for part in signature_header.split():
+            if part.startswith("v1,"):
+                signatures.append(part[3:])
+            else:
+                signatures.append(part)
+
+        # Decode the secret if it starts with whsec_
+        if secret.startswith("whsec_"):
+            decoded_secret = base64.b64decode(secret[6:])
+        else:
+            decoded_secret = secret.encode()
+
+        body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+        # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+        signed_payload = f"{webhook_id}.{timestamp}.{body}"
+        expected_signature = base64.b64encode(
+            hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+        ).decode()
+
+        # Accept if any signature matches
+        if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+            raise InvalidWebhookSignatureError("The given webhook signature does not match the expected signature")
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 1b4fca26ee..453b26f555 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -1,45 +1,95 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from .edit import Edit as Edit
+from .batch import Batch as Batch
 from .image import Image as Image
 from .model import Model as Model
-from .shared import FunctionDefinition as FunctionDefinition
-from .shared import FunctionParameters as FunctionParameters
+from .shared import (
+    Metadata as Metadata,
+    AllModels as AllModels,
+    ChatModel as ChatModel,
+    Reasoning as Reasoning,
+    ErrorObject as ErrorObject,
+    CompoundFilter as CompoundFilter,
+    ResponsesModel as ResponsesModel,
+    ReasoningEffort as ReasoningEffort,
+    ComparisonFilter as ComparisonFilter,
+    FunctionDefinition as FunctionDefinition,
+    FunctionParameters as FunctionParameters,
+    ResponseFormatText as ResponseFormatText,
+    ResponseFormatJSONObject as ResponseFormatJSONObject,
+    ResponseFormatJSONSchema as ResponseFormatJSONSchema,
+)
+from .upload import Upload as Upload
 from .embedding import Embedding as Embedding
-from .fine_tune import FineTune as FineTune
+from .chat_model import ChatModel as ChatModel
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
+from .audio_model import AudioModel as AudioModel
+from .batch_error import BatchError as BatchError
 from .file_object import FileObject as FileObject
+from .image_model import ImageModel as ImageModel
 from .file_content import FileContent as FileContent
 from .file_deleted import FileDeleted as FileDeleted
+from .file_purpose import FilePurpose as FilePurpose
+from .vector_store import VectorStore as VectorStore
 from .model_deleted import ModelDeleted as ModelDeleted
-from .fine_tune_event import FineTuneEvent as FineTuneEvent
+from .embedding_model import EmbeddingModel as EmbeddingModel
 from .images_response import ImagesResponse as ImagesResponse
 from .completion_usage import CompletionUsage as CompletionUsage
+from .eval_list_params import EvalListParams as EvalListParams
 from .file_list_params import FileListParams as FileListParams
+from .moderation_model import ModerationModel as ModerationModel
+from .batch_list_params import BatchListParams as BatchListParams
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
-from .edit_create_params import EditCreateParams as EditCreateParams
+from .eval_create_params import EvalCreateParams as EvalCreateParams
+from .eval_list_response import EvalListResponse as EvalListResponse
+from .eval_update_params import EvalUpdateParams as EvalUpdateParams
 from .file_create_params import FileCreateParams as FileCreateParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .eval_create_response import EvalCreateResponse as EvalCreateResponse
+from .eval_delete_response import EvalDeleteResponse as EvalDeleteResponse
+from .eval_update_response import EvalUpdateResponse as EvalUpdateResponse
+from .upload_create_params import UploadCreateParams as UploadCreateParams
+from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
+from .audio_response_format import AudioResponseFormat as AudioResponseFormat
+from .container_list_params import ContainerListParams as ContainerListParams
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .eval_retrieve_response import EvalRetrieveResponse as EvalRetrieveResponse
+from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
+from .upload_complete_params import UploadCompleteParams as UploadCompleteParams
+from .container_create_params import ContainerCreateParams as ContainerCreateParams
+from .container_list_response import ContainerListResponse as ContainerListResponse
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
-from .fine_tune_create_params import FineTuneCreateParams as FineTuneCreateParams
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
-from .create_embedding_response import (
-    CreateEmbeddingResponse as CreateEmbeddingResponse,
-)
-from .moderation_create_response import (
-    ModerationCreateResponse as ModerationCreateResponse,
-)
-from .fine_tune_list_events_params import (
-    FineTuneListEventsParams as FineTuneListEventsParams,
-)
-from .image_create_variation_params import (
-    ImageCreateVariationParams as ImageCreateVariationParams,
+from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
+from .container_create_response import ContainerCreateResponse as ContainerCreateResponse
+from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
+from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
+from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
+from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
+from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .container_retrieve_response import ContainerRetrieveResponse as ContainerRetrieveResponse
+from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
+from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
+from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
+from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
+from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig as EvalCustomDataSourceConfig
+from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
+from .eval_stored_completions_data_source_config import (
+    EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
 )
-from .fine_tune_events_list_response import (
-    FineTuneEventsListResponse as FineTuneEventsListResponse,
+from .static_file_chunking_strategy_object_param import (
+    StaticFileChunkingStrategyObjectParam as StaticFileChunkingStrategyObjectParam,
 )
diff --git a/src/openai/types/audio/__init__.py b/src/openai/types/audio/__init__.py
index 83afa060f8..396944ee47 100644
--- a/src/openai/types/audio/__init__.py
+++ b/src/openai/types/audio/__init__.py
@@ -1,13 +1,20 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .translation import Translation as Translation
+from .speech_model import SpeechModel as SpeechModel
 from .transcription import Transcription as Transcription
+from .transcription_word import TranscriptionWord as TranscriptionWord
+from .translation_verbose import TranslationVerbose as TranslationVerbose
 from .speech_create_params import SpeechCreateParams as SpeechCreateParams
-from .translation_create_params import (
-    TranslationCreateParams as TranslationCreateParams,
-)
-from .transcription_create_params import (
-    TranscriptionCreateParams as TranscriptionCreateParams,
-)
+from .transcription_include import TranscriptionInclude as TranscriptionInclude
+from .transcription_segment import TranscriptionSegment as TranscriptionSegment
+from .transcription_verbose import TranscriptionVerbose as TranscriptionVerbose
+from .translation_create_params import TranslationCreateParams as TranslationCreateParams
+from .transcription_stream_event import TranscriptionStreamEvent as TranscriptionStreamEvent
+from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
+from .translation_create_response import TranslationCreateResponse as TranslationCreateResponse
+from .transcription_create_response import TranscriptionCreateResponse as TranscriptionCreateResponse
+from .transcription_text_done_event import TranscriptionTextDoneEvent as TranscriptionTextDoneEvent
+from .transcription_text_delta_event import TranscriptionTextDeltaEvent as TranscriptionTextDeltaEvent
diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
index 06bea01746..4ee4a3c4e4 100644
--- a/src/openai/types/audio/speech_create_params.py
+++ b/src/openai/types/audio/speech_create_params.py
@@ -1,10 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Union
 from typing_extensions import Literal, Required, TypedDict
 
+from .speech_model import SpeechModel
+
 __all__ = ["SpeechCreateParams"]
 
 
@@ -12,23 +14,46 @@ class SpeechCreateParams(TypedDict, total=False):
     input: Required[str]
     """The text to generate audio for. The maximum length is 4096 characters."""
 
-    model: Required[Union[str, Literal["tts-1", "tts-1-hd"]]]
+    model: Required[Union[str, SpeechModel]]
     """
-    One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-    `tts-1` or `tts-1-hd`
+    One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+    `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
     """
 
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    voice: Required[
+        Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ]
+    ]
     """The voice to use when generating the audio.
 
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`,
+    `nova`, `sage`, `shimmer`, and `verse`. Previews of the voices are available in
+    the
+    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
     """
 
-    response_format: Literal["mp3", "opus", "aac", "flac"]
-    """The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`."""
+    instructions: str
+    """Control the voice of your generated audio with additional instructions.
+
+    Does not work with `tts-1` or `tts-1-hd`.
+    """
+
+    response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
+    """The format to audio in.
+
+    Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`.
+    """
 
     speed: float
     """The speed of the generated audio.
 
     Select a value from `0.25` to `4.0`. `1.0` is the default.
     """
+
+    stream_format: Literal["sse", "audio"]
+    """The format to stream the audio in.
+
+    Supported formats are `sse` and `audio`. `sse` is not supported for `tts-1` or
+    `tts-1-hd`.
+    """
diff --git a/src/openai/types/audio/speech_model.py b/src/openai/types/audio/speech_model.py
new file mode 100644
index 0000000000..f004f805da
--- /dev/null
+++ b/src/openai/types/audio/speech_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["SpeechModel"]
+
+SpeechModel: TypeAlias = Literal["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
diff --git a/src/openai/types/audio/transcription.py b/src/openai/types/audio/transcription.py
index d2274faa0e..7115eb9edb 100644
--- a/src/openai/types/audio/transcription.py
+++ b/src/openai/types/audio/transcription.py
@@ -1,9 +1,71 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
 from ..._models import BaseModel
 
-__all__ = ["Transcription"]
+__all__ = ["Transcription", "Logprob", "Usage", "UsageTokens", "UsageTokensInputTokenDetails", "UsageDuration"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token in the transcription."""
+
+    bytes: Optional[List[float]] = None
+    """The bytes of the token."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class UsageTokensInputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class UsageTokens(BaseModel):
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageTokensInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class UsageDuration(BaseModel):
+    duration: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+Usage: TypeAlias = Annotated[Union[UsageTokens, UsageDuration], PropertyInfo(discriminator="type")]
 
 
 class Transcription(BaseModel):
     text: str
+    """The transcribed text."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the tokens in the transcription.
+
+    Only returned with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`
+    if `logprobs` is added to the `include` array.
+    """
+
+    usage: Optional[Usage] = None
+    """Token usage statistics for the request."""
diff --git a/src/openai/types/audio/transcription_create_params.py b/src/openai/types/audio/transcription_create_params.py
index 7bd70d7b48..8271b054ab 100644
--- a/src/openai/types/audio/transcription_create_params.py
+++ b/src/openai/types/audio/transcription_create_params.py
@@ -1,45 +1,77 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..._types import FileTypes
+from ..audio_model import AudioModel
+from .transcription_include import TranscriptionInclude
+from ..audio_response_format import AudioResponseFormat
 
-__all__ = ["TranscriptionCreateParams"]
+__all__ = [
+    "TranscriptionCreateParamsBase",
+    "ChunkingStrategy",
+    "ChunkingStrategyVadConfig",
+    "TranscriptionCreateParamsNonStreaming",
+    "TranscriptionCreateParamsStreaming",
+]
 
 
-class TranscriptionCreateParams(TypedDict, total=False):
+class TranscriptionCreateParamsBase(TypedDict, total=False):
     file: Required[FileTypes]
     """
     The audio file object (not file name) to transcribe, in one of these formats:
     flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use. Only `whisper-1` is currently available."""
+    model: Required[Union[str, AudioModel]]
+    """ID of the model to use.
+
+    The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1`
+    (which is powered by our open source Whisper V2 model).
+    """
+
+    chunking_strategy: Optional[ChunkingStrategy]
+    """Controls how the audio is cut into chunks.
+
+    When set to `"auto"`, the server first normalizes loudness and then uses voice
+    activity detection (VAD) to choose boundaries. `server_vad` object can be
+    provided to tweak VAD detection parameters manually. If unset, the audio is
+    transcribed as a single block.
+    """
+
+    include: List[TranscriptionInclude]
+    """Additional information to include in the transcription response.
+
+    `logprobs` will return the log probabilities of the tokens in the response to
+    understand the model's confidence in the transcription. `logprobs` only works
+    with response_format set to `json` and only with the models `gpt-4o-transcribe`
+    and `gpt-4o-mini-transcribe`.
+    """
 
     language: str
     """The language of the input audio.
 
     Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-    improve accuracy and latency.
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
     """
 
     prompt: str
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should match the audio language.
     """
 
-    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+    response_format: AudioResponseFormat
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+    the only supported format is `json`.
     """
 
     temperature: float
@@ -50,3 +82,68 @@ class TranscriptionCreateParams(TypedDict, total=False):
     [log probability](https://en.wikipedia.org/wiki/Log_probability) to
     automatically increase the temperature until certain thresholds are hit.
     """
+
+    timestamp_granularities: List[Literal["word", "segment"]]
+    """The timestamp granularities to populate for this transcription.
+
+    `response_format` must be set `verbose_json` to use timestamp granularities.
+    Either or both of these options are supported: `word`, or `segment`. Note: There
+    is no additional latency for segment timestamps, but generating word timestamps
+    incurs additional latency.
+    """
+
+
+class ChunkingStrategyVadConfig(TypedDict, total=False):
+    type: Required[Literal["server_vad"]]
+    """Must be set to `server_vad` to enable manual chunking using server side VAD."""
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds)."""
+
+    silence_duration_ms: int
+    """
+    Duration of silence to detect speech stop (in milliseconds). With shorter values
+    the model will respond more quickly, but may jump in on short pauses from the
+    user.
+    """
+
+    threshold: float
+    """Sensitivity threshold (0.0 to 1.0) for voice activity detection.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+
+ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
+
+
+class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+    for more information.
+
+    Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+    """
+
+
+class TranscriptionCreateParamsStreaming(TranscriptionCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+    for more information.
+
+    Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+    """
+
+
+TranscriptionCreateParams = Union[TranscriptionCreateParamsNonStreaming, TranscriptionCreateParamsStreaming]
diff --git a/src/openai/types/audio/transcription_create_response.py b/src/openai/types/audio/transcription_create_response.py
new file mode 100644
index 0000000000..2f7bed8114
--- /dev/null
+++ b/src/openai/types/audio/transcription_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .transcription import Transcription
+from .transcription_verbose import TranscriptionVerbose
+
+__all__ = ["TranscriptionCreateResponse"]
+
+TranscriptionCreateResponse: TypeAlias = Union[Transcription, TranscriptionVerbose]
diff --git a/src/openai/types/audio/transcription_include.py b/src/openai/types/audio/transcription_include.py
new file mode 100644
index 0000000000..0e464ac934
--- /dev/null
+++ b/src/openai/types/audio/transcription_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["TranscriptionInclude"]
+
+TranscriptionInclude: TypeAlias = Literal["logprobs"]
diff --git a/src/openai/types/audio/transcription_segment.py b/src/openai/types/audio/transcription_segment.py
new file mode 100644
index 0000000000..522c401ebb
--- /dev/null
+++ b/src/openai/types/audio/transcription_segment.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionSegment"]
+
+
+class TranscriptionSegment(BaseModel):
+    id: int
+    """Unique identifier of the segment."""
+
+    avg_logprob: float
+    """Average logprob of the segment.
+
+    If the value is lower than -1, consider the logprobs failed.
+    """
+
+    compression_ratio: float
+    """Compression ratio of the segment.
+
+    If the value is greater than 2.4, consider the compression failed.
+    """
+
+    end: float
+    """End time of the segment in seconds."""
+
+    no_speech_prob: float
+    """Probability of no speech in the segment.
+
+    If the value is higher than 1.0 and the `avg_logprob` is below -1, consider this
+    segment silent.
+    """
+
+    seek: int
+    """Seek offset of the segment."""
+
+    start: float
+    """Start time of the segment in seconds."""
+
+    temperature: float
+    """Temperature parameter used for generating the segment."""
+
+    text: str
+    """Text content of the segment."""
+
+    tokens: List[int]
+    """Array of token IDs for the text content."""
diff --git a/src/openai/types/audio/transcription_stream_event.py b/src/openai/types/audio/transcription_stream_event.py
new file mode 100644
index 0000000000..757077a280
--- /dev/null
+++ b/src/openai/types/audio/transcription_stream_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .transcription_text_done_event import TranscriptionTextDoneEvent
+from .transcription_text_delta_event import TranscriptionTextDeltaEvent
+
+__all__ = ["TranscriptionStreamEvent"]
+
+TranscriptionStreamEvent: TypeAlias = Annotated[
+    Union[TranscriptionTextDeltaEvent, TranscriptionTextDoneEvent], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/audio/transcription_text_delta_event.py b/src/openai/types/audio/transcription_text_delta_event.py
new file mode 100644
index 0000000000..36c52f0623
--- /dev/null
+++ b/src/openai/types/audio/transcription_text_delta_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionTextDeltaEvent", "Logprob"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token that was used to generate the log probability."""
+
+    bytes: Optional[List[int]] = None
+    """The bytes that were used to generate the log probability."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class TranscriptionTextDeltaEvent(BaseModel):
+    delta: str
+    """The text delta that was additionally transcribed."""
+
+    type: Literal["transcript.text.delta"]
+    """The type of the event. Always `transcript.text.delta`."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the delta.
+
+    Only included if you
+    [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+    with the `include[]` parameter set to `logprobs`.
+    """
diff --git a/src/openai/types/audio/transcription_text_done_event.py b/src/openai/types/audio/transcription_text_done_event.py
new file mode 100644
index 0000000000..9665edc565
--- /dev/null
+++ b/src/openai/types/audio/transcription_text_done_event.py
@@ -0,0 +1,63 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionTextDoneEvent", "Logprob", "Usage", "UsageInputTokenDetails"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token that was used to generate the log probability."""
+
+    bytes: Optional[List[int]] = None
+    """The bytes that were used to generate the log probability."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class UsageInputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class Usage(BaseModel):
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class TranscriptionTextDoneEvent(BaseModel):
+    text: str
+    """The text that was transcribed."""
+
+    type: Literal["transcript.text.done"]
+    """The type of the event. Always `transcript.text.done`."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the individual tokens in the transcription.
+
+    Only included if you
+    [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+    with the `include[]` parameter set to `logprobs`.
+    """
+
+    usage: Optional[Usage] = None
+    """Usage statistics for models billed by token usage."""
diff --git a/src/openai/types/audio/transcription_verbose.py b/src/openai/types/audio/transcription_verbose.py
new file mode 100644
index 0000000000..cc6d769a65
--- /dev/null
+++ b/src/openai/types/audio/transcription_verbose.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .transcription_word import TranscriptionWord
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranscriptionVerbose", "Usage"]
+
+
+class Usage(BaseModel):
+    duration: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+class TranscriptionVerbose(BaseModel):
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the input audio."""
+
+    text: str
+    """The transcribed text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the transcribed text and their corresponding details."""
+
+    usage: Optional[Usage] = None
+    """Usage statistics for models billed by audio input duration."""
+
+    words: Optional[List[TranscriptionWord]] = None
+    """Extracted words and their corresponding timestamps."""
diff --git a/src/openai/types/audio/transcription_word.py b/src/openai/types/audio/transcription_word.py
new file mode 100644
index 0000000000..2ce682f957
--- /dev/null
+++ b/src/openai/types/audio/transcription_word.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionWord"]
+
+
+class TranscriptionWord(BaseModel):
+    end: float
+    """End time of the word in seconds."""
+
+    start: float
+    """Start time of the word in seconds."""
+
+    word: str
+    """The text content of the word."""
diff --git a/src/openai/types/audio/translation.py b/src/openai/types/audio/translation.py
index a01d622abc..efc56f7f9b 100644
--- a/src/openai/types/audio/translation.py
+++ b/src/openai/types/audio/translation.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from ..._models import BaseModel
 
diff --git a/src/openai/types/audio/translation_create_params.py b/src/openai/types/audio/translation_create_params.py
index d3cb4b9e63..b23a185375 100644
--- a/src/openai/types/audio/translation_create_params.py
+++ b/src/openai/types/audio/translation_create_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from ..._types import FileTypes
+from ..audio_model import AudioModel
 
 __all__ = ["TranslationCreateParams"]
 
@@ -17,21 +18,25 @@ class TranslationCreateParams(TypedDict, total=False):
     mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use. Only `whisper-1` is currently available."""
+    model: Required[Union[str, AudioModel]]
+    """ID of the model to use.
+
+    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
+    currently available.
+    """
 
     prompt: str
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should be in English.
     """
 
-    response_format: str
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`.
     """
 
     temperature: float
diff --git a/src/openai/types/audio/translation_create_response.py b/src/openai/types/audio/translation_create_response.py
new file mode 100644
index 0000000000..9953813c08
--- /dev/null
+++ b/src/openai/types/audio/translation_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .translation import Translation
+from .translation_verbose import TranslationVerbose
+
+__all__ = ["TranslationCreateResponse"]
+
+TranslationCreateResponse: TypeAlias = Union[Translation, TranslationVerbose]
diff --git a/src/openai/types/audio/translation_verbose.py b/src/openai/types/audio/translation_verbose.py
new file mode 100644
index 0000000000..27cb02d64f
--- /dev/null
+++ b/src/openai/types/audio/translation_verbose.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranslationVerbose"]
+
+
+class TranslationVerbose(BaseModel):
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the output translation (always `english`)."""
+
+    text: str
+    """The translated text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the translated text and their corresponding details."""
diff --git a/src/openai/types/audio_model.py b/src/openai/types/audio_model.py
new file mode 100644
index 0000000000..4d14d60181
--- /dev/null
+++ b/src/openai/types/audio_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioModel"]
+
+AudioModel: TypeAlias = Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]
diff --git a/src/openai/types/audio_response_format.py b/src/openai/types/audio_response_format.py
new file mode 100644
index 0000000000..f8c8d45945
--- /dev/null
+++ b/src/openai/types/audio_response_format.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioResponseFormat"]
+
+AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
diff --git a/src/openai/types/auto_file_chunking_strategy_param.py b/src/openai/types/auto_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..6f17836bac
--- /dev/null
+++ b/src/openai/types/auto_file_chunking_strategy_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["AutoFileChunkingStrategyParam"]
+
+
+class AutoFileChunkingStrategyParam(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
new file mode 100644
index 0000000000..35de90ac85
--- /dev/null
+++ b/src/openai/types/batch.py
@@ -0,0 +1,87 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .batch_error import BatchError
+from .shared.metadata import Metadata
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+    data: Optional[List[BatchError]] = None
+
+    object: Optional[str] = None
+    """The object type, which is always `list`."""
+
+
+class Batch(BaseModel):
+    id: str
+
+    completion_window: str
+    """The time frame within which the batch should be processed."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the batch was created."""
+
+    endpoint: str
+    """The OpenAI API endpoint used by the batch."""
+
+    input_file_id: str
+    """The ID of the input file for the batch."""
+
+    object: Literal["batch"]
+    """The object type, which is always `batch`."""
+
+    status: Literal[
+        "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+    ]
+    """The current status of the batch."""
+
+    cancelled_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was cancelled."""
+
+    cancelling_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started cancelling."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch was completed."""
+
+    error_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of requests with errors."""
+
+    errors: Optional[Errors] = None
+
+    expired_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch expired."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch will expire."""
+
+    failed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch failed."""
+
+    finalizing_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started finalizing."""
+
+    in_progress_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the batch started processing."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    output_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of successfully executed requests."""
+
+    request_counts: Optional[BatchRequestCounts] = None
+    """The request counts for different statuses within the batch."""
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
new file mode 100644
index 0000000000..cc95afd3ba
--- /dev/null
+++ b/src/openai/types/batch_create_params.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    completion_window: Required[Literal["24h"]]
+    """The time frame within which the batch should be processed.
+
+    Currently only `24h` is supported.
+    """
+
+    endpoint: Required[Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"]]
+    """The endpoint to be used for all requests in the batch.
+
+    Currently `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and
+    `/v1/completions` are supported. Note that `/v1/embeddings` batches are also
+    restricted to a maximum of 50,000 embedding inputs across all requests in the
+    batch.
+    """
+
+    input_file_id: Required[str]
+    """The ID of an uploaded file that contains requests for the new batch.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your input file must be formatted as a
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
+    and must be uploaded with the purpose `batch`. The file can contain up to 50,000
+    requests, and can be up to 200 MB in size.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/batch_error.py b/src/openai/types/batch_error.py
new file mode 100644
index 0000000000..1cdd808dbd
--- /dev/null
+++ b/src/openai/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+    code: Optional[str] = None
+    """An error code identifying the error type."""
+
+    line: Optional[int] = None
+    """The line number of the input file where the error occurred, if applicable."""
+
+    message: Optional[str] = None
+    """A human-readable message providing more details about the error."""
+
+    param: Optional[str] = None
+    """The name of the parameter that caused the error, if applicable."""
diff --git a/src/openai/types/batch_list_params.py b/src/openai/types/batch_list_params.py
new file mode 100644
index 0000000000..ef5e966b79
--- /dev/null
+++ b/src/openai/types/batch_list_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
new file mode 100644
index 0000000000..068b071af1
--- /dev/null
+++ b/src/openai/types/batch_request_counts.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+    completed: int
+    """Number of requests that have been completed successfully."""
+
+    failed: int
+    """Number of requests that have failed."""
+
+    total: int
+    """Total number of requests in the batch."""
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index c03d823b8c..5ba3eadf3c 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -1,16 +1,33 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .thread import Thread as Thread
 from .assistant import Assistant as Assistant
+from .function_tool import FunctionTool as FunctionTool
+from .assistant_tool import AssistantTool as AssistantTool
 from .thread_deleted import ThreadDeleted as ThreadDeleted
+from .file_search_tool import FileSearchTool as FileSearchTool
 from .assistant_deleted import AssistantDeleted as AssistantDeleted
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .assistant_tool_param import AssistantToolParam as AssistantToolParam
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
 from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
+from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
+from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
-from .thread_create_and_run_params import (
-    ThreadCreateAndRunParams as ThreadCreateAndRunParams,
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
+from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
 )
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index a21206765a..58421e0f66 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -1,33 +1,39 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import List, Union, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
-from ..shared import FunctionDefinition
 from ..._models import BaseModel
+from .assistant_tool import AssistantTool
+from ..shared.metadata import Metadata
+from .assistant_response_format_option import AssistantResponseFormatOption
 
-__all__ = ["Assistant", "Tool", "ToolCodeInterpreter", "ToolRetrieval", "ToolFunction"]
+__all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
-class ToolCodeInterpreter(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter`` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
 
 
-class ToolFunction(BaseModel):
-    function: FunctionDefinition
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
 
-Tool = Union[ToolCodeInterpreter, ToolRetrieval, ToolFunction]
+    file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Assistant(BaseModel):
@@ -37,28 +43,23 @@ class Assistant(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the assistant was created."""
 
-    description: Optional[str]
+    description: Optional[str] = None
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
-    instructions: Optional[str]
+    instructions: Optional[str] = None
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[builtins.object]
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -67,19 +68,67 @@ class Assistant(BaseModel):
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
-    name: Optional[str]
+    name: Optional[str] = None
     """The name of the assistant. The maximum length is 256 characters."""
 
     object: Literal["assistant"]
     """The object type, which is always `assistant`."""
 
-    tools: List[Tool]
+    tools: List[AssistantTool]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
+    """
+
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources] = None
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 539897a7ba..8b3c331850 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -1,82 +1,212 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ...types import shared_params
+from ..shared.chat_model import ChatModel
+from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
     "AssistantCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[str]
+    model: Required[Union[str, ChatModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     description: Optional[str]
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: List[Tool]
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+    We generally recommend altering this or temperature but not both.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
 
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this assistant. There can be a maximum of 1
+    vector store attached to the assistant.
+    """
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/assistant_deleted.py b/src/openai/types/beta/assistant_deleted.py
index 23802caaf6..3be40cd6b8 100644
--- a/src/openai/types/beta/assistant_deleted.py
+++ b/src/openai/types/beta/assistant_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/assistant_list_params.py b/src/openai/types/beta/assistant_list_params.py
index b2d794a43a..834ffbcaf8 100644
--- a/src/openai/types/beta/assistant_list_params.py
+++ b/src/openai/types/beta/assistant_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -21,7 +21,7 @@ class AssistantListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..6f06a3442f
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..5e724a4d98
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py
new file mode 100644
index 0000000000..41d3a0c5ea
--- /dev/null
+++ b/src/openai/types/beta/assistant_stream_event.py
@@ -0,0 +1,294 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .thread import Thread
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .threads.run import Run
+from .threads.message import Message
+from ..shared.error_object import ErrorObject
+from .threads.runs.run_step import RunStep
+from .threads.message_delta_event import MessageDeltaEvent
+from .threads.runs.run_step_delta_event import RunStepDeltaEvent
+
+__all__ = [
+    "AssistantStreamEvent",
+    "ThreadCreated",
+    "ThreadRunCreated",
+    "ThreadRunQueued",
+    "ThreadRunInProgress",
+    "ThreadRunRequiresAction",
+    "ThreadRunCompleted",
+    "ThreadRunIncomplete",
+    "ThreadRunFailed",
+    "ThreadRunCancelling",
+    "ThreadRunCancelled",
+    "ThreadRunExpired",
+    "ThreadRunStepCreated",
+    "ThreadRunStepInProgress",
+    "ThreadRunStepDelta",
+    "ThreadRunStepCompleted",
+    "ThreadRunStepFailed",
+    "ThreadRunStepCancelled",
+    "ThreadRunStepExpired",
+    "ThreadMessageCreated",
+    "ThreadMessageInProgress",
+    "ThreadMessageDelta",
+    "ThreadMessageCompleted",
+    "ThreadMessageIncomplete",
+    "ErrorEvent",
+]
+
+
+class ThreadCreated(BaseModel):
+    data: Thread
+    """
+    Represents a thread that contains
+    [messages](https://platform.openai.com/docs/api-reference/messages).
+    """
+
+    event: Literal["thread.created"]
+
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
+
+class ThreadRunCreated(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.created"]
+
+
+class ThreadRunQueued(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.queued"]
+
+
+class ThreadRunInProgress(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.in_progress"]
+
+
+class ThreadRunRequiresAction(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.requires_action"]
+
+
+class ThreadRunCompleted(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.completed"]
+
+
+class ThreadRunIncomplete(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.incomplete"]
+
+
+class ThreadRunFailed(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.failed"]
+
+
+class ThreadRunCancelling(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelling"]
+
+
+class ThreadRunCancelled(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelled"]
+
+
+class ThreadRunExpired(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.expired"]
+
+
+class ThreadRunStepCreated(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.created"]
+
+
+class ThreadRunStepInProgress(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.in_progress"]
+
+
+class ThreadRunStepDelta(BaseModel):
+    data: RunStepDeltaEvent
+    """Represents a run step delta i.e.
+
+    any changed fields on a run step during streaming.
+    """
+
+    event: Literal["thread.run.step.delta"]
+
+
+class ThreadRunStepCompleted(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.completed"]
+
+
+class ThreadRunStepFailed(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.failed"]
+
+
+class ThreadRunStepCancelled(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.cancelled"]
+
+
+class ThreadRunStepExpired(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.expired"]
+
+
+class ThreadMessageCreated(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.created"]
+
+
+class ThreadMessageInProgress(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.in_progress"]
+
+
+class ThreadMessageDelta(BaseModel):
+    data: MessageDeltaEvent
+    """Represents a message delta i.e.
+
+    any changed fields on a message during streaming.
+    """
+
+    event: Literal["thread.message.delta"]
+
+
+class ThreadMessageCompleted(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.completed"]
+
+
+class ThreadMessageIncomplete(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.incomplete"]
+
+
+class ErrorEvent(BaseModel):
+    data: ErrorObject
+
+    event: Literal["error"]
+
+
+AssistantStreamEvent: TypeAlias = Annotated[
+    Union[
+        ThreadCreated,
+        ThreadRunCreated,
+        ThreadRunQueued,
+        ThreadRunInProgress,
+        ThreadRunRequiresAction,
+        ThreadRunCompleted,
+        ThreadRunIncomplete,
+        ThreadRunFailed,
+        ThreadRunCancelling,
+        ThreadRunCancelled,
+        ThreadRunExpired,
+        ThreadRunStepCreated,
+        ThreadRunStepInProgress,
+        ThreadRunStepDelta,
+        ThreadRunStepCompleted,
+        ThreadRunStepFailed,
+        ThreadRunStepCancelled,
+        ThreadRunStepExpired,
+        ThreadMessageCreated,
+        ThreadMessageInProgress,
+        ThreadMessageDelta,
+        ThreadMessageCompleted,
+        ThreadMessageIncomplete,
+        ErrorEvent,
+    ],
+    PropertyInfo(discriminator="event"),
+]
diff --git a/src/openai/types/beta/assistant_tool.py b/src/openai/types/beta/assistant_tool.py
new file mode 100644
index 0000000000..1bde6858b1
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .function_tool import FunctionTool
+from .file_search_tool import FileSearchTool
+from .code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["AssistantTool"]
+
+AssistantTool: TypeAlias = Annotated[
+    Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..d73439f006
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+    type: Literal["function", "code_interpreter", "file_search"]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..87f38310ca
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..e57c3278fb
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..cc0053d37e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..904f489e26
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+    type: Required[Literal["function", "code_interpreter", "file_search"]]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_tool_param.py b/src/openai/types/beta/assistant_tool_param.py
new file mode 100644
index 0000000000..321c4b1ddb
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .function_tool_param import FunctionToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["AssistantToolParam"]
+
+AssistantToolParam: TypeAlias = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index a0efd96ecd..b28094a6a5 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -1,84 +1,177 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
 
-from ...types import shared_params
+from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
-__all__ = [
-    "AssistantUpdateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-]
+__all__ = ["AssistantUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class AssistantUpdateParams(TypedDict, total=False):
     description: Optional[str]
     """The description of the assistant. The maximum length is 512 characters."""
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order. If a
-    file was previosuly attached to the list but does not show up in the list, it
-    will be deleted from the assistant.
-    """
-
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    model: str
+    model: Union[
+        str,
+        Literal[
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "gpt-4.1-2025-04-14",
+            "gpt-4.1-mini-2025-04-14",
+            "gpt-4.1-nano-2025-04-14",
+            "o3-mini",
+            "o3-mini-2025-01-31",
+            "o1",
+            "o1-2024-12-17",
+            "gpt-4o",
+            "gpt-4o-2024-11-20",
+            "gpt-4o-2024-08-06",
+            "gpt-4o-2024-05-13",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+            "gpt-4.5-preview",
+            "gpt-4.5-preview-2025-02-27",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+    ]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: List[Tool]
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
+    `code_interpreter`, `file_search`, or `function`.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+    We generally recommend altering this or temperature but not both.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    Overrides the list of
+    [file](https://platform.openai.com/docs/api-reference/files) IDs made available
+    to the `code_interpreter` tool. There can be a maximum of 20 files associated
+    with the tool.
+    """
 
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    Overrides the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
 
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/assistants/__init__.py b/src/openai/types/beta/assistants/__init__.py
deleted file mode 100644
index 9dbb3e2b8b..0000000000
--- a/src/openai/types/beta/assistants/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from .assistant_file import AssistantFile as AssistantFile
-from .file_list_params import FileListParams as FileListParams
-from .file_create_params import FileCreateParams as FileCreateParams
-from .file_delete_response import FileDeleteResponse as FileDeleteResponse
diff --git a/src/openai/types/beta/assistants/assistant_file.py b/src/openai/types/beta/assistants/assistant_file.py
deleted file mode 100644
index 1d1573ac0f..0000000000
--- a/src/openai/types/beta/assistants/assistant_file.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["AssistantFile"]
-
-
-class AssistantFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: str
-    """The assistant ID that the file is attached to."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the assistant file was created."""
-
-    object: Literal["assistant.file"]
-    """The object type, which is always `assistant.file`."""
diff --git a/src/openai/types/beta/assistants/file_create_params.py b/src/openai/types/beta/assistants/file_create_params.py
deleted file mode 100644
index f70f96fc1b..0000000000
--- a/src/openai/types/beta/assistants/file_create_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """
-    A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-    `purpose="assistants"`) that the assistant should use. Useful for tools like
-    `retrieval` and `code_interpreter` that can access files.
-    """
diff --git a/src/openai/types/beta/assistants/file_delete_response.py b/src/openai/types/beta/assistants/file_delete_response.py
deleted file mode 100644
index 52c138feda..0000000000
--- a/src/openai/types/beta/assistants/file_delete_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FileDeleteResponse"]
-
-
-class FileDeleteResponse(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["assistant.file.deleted"]
diff --git a/src/openai/types/beta/chat/__init__.py b/src/openai/types/beta/chat/__init__.py
index b2f53e3525..f8ee8b14b1 100644
--- a/src/openai/types/beta/chat/__init__.py
+++ b/src/openai/types/beta/chat/__init__.py
@@ -1,3 +1,3 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
diff --git a/src/openai/types/beta/code_interpreter_tool.py b/src/openai/types/beta/code_interpreter_tool.py
new file mode 100644
index 0000000000..17ab3de629
--- /dev/null
+++ b/src/openai/types/beta/code_interpreter_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["CodeInterpreterTool"]
+
+
+class CodeInterpreterTool(BaseModel):
+    type: Literal["code_interpreter"]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/src/openai/types/beta/code_interpreter_tool_param.py b/src/openai/types/beta/code_interpreter_tool_param.py
new file mode 100644
index 0000000000..4f6916d756
--- /dev/null
+++ b/src/openai/types/beta/code_interpreter_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterToolParam"]
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+    type: Required[Literal["code_interpreter"]]
+    """The type of tool being defined: `code_interpreter`"""
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
new file mode 100644
index 0000000000..89fc16c04c
--- /dev/null
+++ b/src/openai/types/beta/file_search_tool.py
@@ -0,0 +1,55 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileSearchTool", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(BaseModel):
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(BaseModel):
+    max_num_results: Optional[int] = None
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+
+class FileSearchTool(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
+
+    file_search: Optional[FileSearch] = None
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
new file mode 100644
index 0000000000..c73d0af79d
--- /dev/null
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileSearchToolParam", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(TypedDict, total=False):
+    score_threshold: Required[float]
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(TypedDict, total=False):
+    max_num_results: int
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: FileSearchRankingOptions
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+    file_search: FileSearch
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/function_tool.py b/src/openai/types/beta/function_tool.py
new file mode 100644
index 0000000000..f9227678df
--- /dev/null
+++ b/src/openai/types/beta/function_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/function_tool_param.py b/src/openai/types/beta/function_tool_param.py
new file mode 100644
index 0000000000..d906e02b88
--- /dev/null
+++ b/src/openai/types/beta/function_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ..shared_params.function_definition import FunctionDefinition
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    function: Required[FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/realtime/__init__.py b/src/openai/types/beta/realtime/__init__.py
new file mode 100644
index 0000000000..0374b9b457
--- /dev/null
+++ b/src/openai/types/beta/realtime/__init__.py
@@ -0,0 +1,96 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session import Session as Session
+from .error_event import ErrorEvent as ErrorEvent
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .transcription_session import TranscriptionSession as TranscriptionSession
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
+from .session_create_response import SessionCreateResponse as SessionCreateResponse
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .conversation_item_content import ConversationItemContent as ConversationItemContent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .transcription_session_update import TranscriptionSessionUpdate as TranscriptionSessionUpdate
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .conversation_item_content_param import ConversationItemContentParam as ConversationItemContentParam
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent as ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .conversation_item_with_reference import ConversationItemWithReference as ConversationItemWithReference
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .transcription_session_update_param import TranscriptionSessionUpdateParam as TranscriptionSessionUpdateParam
+from .transcription_session_create_params import TranscriptionSessionCreateParams as TranscriptionSessionCreateParams
+from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_retrieve_event_param import (
+    ConversationItemRetrieveEventParam as ConversationItemRetrieveEventParam,
+)
+from .conversation_item_truncate_event_param import (
+    ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .conversation_item_with_reference_param import (
+    ConversationItemWithReferenceParam as ConversationItemWithReferenceParam,
+)
+from .input_audio_buffer_speech_started_event import (
+    InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+    InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_delta_event import (
+    ConversationItemInputAudioTranscriptionDeltaEvent as ConversationItemInputAudioTranscriptionDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+    ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)
diff --git a/src/openai/types/beta/realtime/conversation_created_event.py b/src/openai/types/beta/realtime/conversation_created_event.py
new file mode 100644
index 0000000000..4ba0540867
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_created_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the conversation."""
+
+    object: Optional[Literal["realtime.conversation"]] = None
+    """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+    conversation: Conversation
+    """The conversation resource."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["conversation.created"]
+    """The event type, must be `conversation.created`."""
diff --git a/src/openai/types/beta/realtime/conversation_item.py b/src/openai/types/beta/realtime/conversation_item.py
new file mode 100644
index 0000000000..4edf6c4d5f
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItem"]
+
+
+class ConversationItem(BaseModel):
+    id: Optional[str] = None
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: Optional[str] = None
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: Optional[str] = None
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Optional[List[ConversationItemContent]] = None
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: Optional[str] = None
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: Optional[str] = None
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Optional[Literal["completed", "incomplete"]] = None
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Optional[Literal["message", "function_call", "function_call_output"]] = None
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_content.py b/src/openai/types/beta/realtime/conversation_item_content.py
new file mode 100644
index 0000000000..ab40a4a1a7
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_content.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemContent"]
+
+
+class ConversationItemContent(BaseModel):
+    id: Optional[str] = None
+    """
+    ID of a previous conversation item to reference (for `item_reference` content
+    types in `response.create` events). These can reference both client and server
+    created items.
+    """
+
+    audio: Optional[str] = None
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: Optional[str] = None
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Optional[Literal["input_text", "input_audio", "item_reference", "text"]] = None
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_content_param.py b/src/openai/types/beta/realtime/conversation_item_content_param.py
new file mode 100644
index 0000000000..7a3a92a39d
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_content_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ConversationItemContentParam"]
+
+
+class ConversationItemContentParam(TypedDict, total=False):
+    id: str
+    """
+    ID of a previous conversation item to reference (for `item_reference` content
+    types in `response.create` events). These can reference both client and server
+    created items.
+    """
+
+    audio: str
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: str
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: str
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Literal["input_text", "input_audio", "item_reference", "text"]
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event.py b/src/openai/types/beta/realtime/conversation_item_create_event.py
new file mode 100644
index 0000000000..f19d552a92
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_create_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    type: Literal["conversation.item.create"]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event_param.py b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
new file mode 100644
index 0000000000..693d0fd54d
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+    item: Required[ConversationItemParam]
+    """The item to add to the conversation."""
+
+    type: Required[Literal["conversation.item.create"]]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_created_event.py b/src/openai/types/beta/realtime/conversation_item_created_event.py
new file mode 100644
index 0000000000..2f20388246
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_created_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    previous_item_id: str
+    """
+    The ID of the preceding item in the Conversation context, allows the client to
+    understand the order of the conversation.
+    """
+
+    type: Literal["conversation.item.created"]
+    """The event type, must be `conversation.item.created`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event.py b/src/openai/types/beta/realtime/conversation_item_delete_event.py
new file mode 100644
index 0000000000..02ca8250ce
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_delete_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+    item_id: str
+    """The ID of the item to delete."""
+
+    type: Literal["conversation.item.delete"]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event_param.py b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py
new file mode 100644
index 0000000000..c3f88d6627
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+    item_id: Required[str]
+    """The ID of the item to delete."""
+
+    type: Required[Literal["conversation.item.delete"]]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_deleted_event.py b/src/openai/types/beta/realtime/conversation_item_deleted_event.py
new file mode 100644
index 0000000000..a35a97817a
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_deleted_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item that was deleted."""
+
+    type: Literal["conversation.item.deleted"]
+    """The event type, must be `conversation.item.deleted`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py
new file mode 100644
index 0000000000..469811693c
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionCompletedEvent", "Logprob"]
+
+
+class Logprob(BaseModel):
+    token: str
+    """The token that was used to generate the log probability."""
+
+    bytes: List[int]
+    """The bytes that were used to generate the log probability."""
+
+    logprob: float
+    """The log probability of the token."""
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item containing the audio."""
+
+    transcript: str
+    """The transcribed text."""
+
+    type: Literal["conversation.item.input_audio_transcription.completed"]
+    """
+    The event type, must be `conversation.item.input_audio_transcription.completed`.
+    """
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the transcription."""
diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_delta_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_delta_event.py
new file mode 100644
index 0000000000..924d06d98a
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_delta_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionDeltaEvent", "Logprob"]
+
+
+class Logprob(BaseModel):
+    token: str
+    """The token that was used to generate the log probability."""
+
+    bytes: List[int]
+    """The bytes that were used to generate the log probability."""
+
+    logprob: float
+    """The log probability of the token."""
+
+
+class ConversationItemInputAudioTranscriptionDeltaEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    type: Literal["conversation.item.input_audio_transcription.delta"]
+    """The event type, must be `conversation.item.input_audio_transcription.delta`."""
+
+    content_index: Optional[int] = None
+    """The index of the content part in the item's content array."""
+
+    delta: Optional[str] = None
+    """The text delta."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the transcription."""
diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py
new file mode 100644
index 0000000000..cecac93e64
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    message: Optional[str] = None
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    error: Error
+    """Details of the transcription error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item."""
+
+    type: Literal["conversation.item.input_audio_transcription.failed"]
+    """The event type, must be `conversation.item.input_audio_transcription.failed`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_param.py b/src/openai/types/beta/realtime/conversation_item_param.py
new file mode 100644
index 0000000000..ac0f8431e5
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_param.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemParam"]
+
+
+class ConversationItemParam(TypedDict, total=False):
+    id: str
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: str
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: str
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Iterable[ConversationItemContentParam]
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: str
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: str
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Literal["user", "assistant", "system"]
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Literal["completed", "incomplete"]
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Literal["message", "function_call", "function_call_output"]
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_retrieve_event.py b/src/openai/types/beta/realtime/conversation_item_retrieve_event.py
new file mode 100644
index 0000000000..822386055c
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_retrieve_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemRetrieveEvent"]
+
+
+class ConversationItemRetrieveEvent(BaseModel):
+    item_id: str
+    """The ID of the item to retrieve."""
+
+    type: Literal["conversation.item.retrieve"]
+    """The event type, must be `conversation.item.retrieve`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_retrieve_event_param.py b/src/openai/types/beta/realtime/conversation_item_retrieve_event_param.py
new file mode 100644
index 0000000000..71b3ffa499
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_retrieve_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemRetrieveEventParam"]
+
+
+class ConversationItemRetrieveEventParam(TypedDict, total=False):
+    item_id: Required[str]
+    """The ID of the item to retrieve."""
+
+    type: Required[Literal["conversation.item.retrieve"]]
+    """The event type, must be `conversation.item.retrieve`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event.py b/src/openai/types/beta/realtime/conversation_item_truncate_event.py
new file mode 100644
index 0000000000..cb336bba2c
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncate_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+    audio_end_ms: int
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: int
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: str
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Literal["conversation.item.truncate"]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py
new file mode 100644
index 0000000000..d3ad1e1e25
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+    audio_end_ms: Required[int]
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: Required[int]
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: Required[str]
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Required[Literal["conversation.item.truncate"]]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncated_event.py b/src/openai/types/beta/realtime/conversation_item_truncated_event.py
new file mode 100644
index 0000000000..36368fa28f
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncated_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+    audio_end_ms: int
+    """The duration up to which the audio was truncated, in milliseconds."""
+
+    content_index: int
+    """The index of the content part that was truncated."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the assistant message item that was truncated."""
+
+    type: Literal["conversation.item.truncated"]
+    """The event type, must be `conversation.item.truncated`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_with_reference.py b/src/openai/types/beta/realtime/conversation_item_with_reference.py
new file mode 100644
index 0000000000..31806afc33
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_with_reference.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItemWithReference"]
+
+
+class ConversationItemWithReference(BaseModel):
+    id: Optional[str] = None
+    """
+    For an item of type (`message` | `function_call` | `function_call_output`) this
+    field allows the client to assign the unique ID of the item. It is not required
+    because the server will generate one if not provided.
+
+    For an item of type `item_reference`, this field is required and is a reference
+    to any item that has previously existed in the conversation.
+    """
+
+    arguments: Optional[str] = None
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: Optional[str] = None
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Optional[List[ConversationItemContent]] = None
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: Optional[str] = None
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: Optional[str] = None
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Optional[Literal["completed", "incomplete"]] = None
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Optional[Literal["message", "function_call", "function_call_output", "item_reference"]] = None
+    """
+    The type of the item (`message`, `function_call`, `function_call_output`,
+    `item_reference`).
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_with_reference_param.py b/src/openai/types/beta/realtime/conversation_item_with_reference_param.py
new file mode 100644
index 0000000000..e266cdce32
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_with_reference_param.py
@@ -0,0 +1,68 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemWithReferenceParam"]
+
+
+class ConversationItemWithReferenceParam(TypedDict, total=False):
+    id: str
+    """
+    For an item of type (`message` | `function_call` | `function_call_output`) this
+    field allows the client to assign the unique ID of the item. It is not required
+    because the server will generate one if not provided.
+
+    For an item of type `item_reference`, this field is required and is a reference
+    to any item that has previously existed in the conversation.
+    """
+
+    arguments: str
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: str
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Iterable[ConversationItemContentParam]
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: str
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: str
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Literal["user", "assistant", "system"]
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Literal["completed", "incomplete"]
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Literal["message", "function_call", "function_call_output", "item_reference"]
+    """
+    The type of the item (`message`, `function_call`, `function_call_output`,
+    `item_reference`).
+    """
diff --git a/src/openai/types/beta/realtime/error_event.py b/src/openai/types/beta/realtime/error_event.py
new file mode 100644
index 0000000000..e020fc3848
--- /dev/null
+++ b/src/openai/types/beta/realtime/error_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ErrorEvent", "Error"]
+
+
+class Error(BaseModel):
+    message: str
+    """A human-readable error message."""
+
+    type: str
+    """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    event_id: Optional[str] = None
+    """The event_id of the client event that caused the error, if applicable."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+
+class ErrorEvent(BaseModel):
+    error: Error
+    """Details of the error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["error"]
+    """The event type, must be `error`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py
new file mode 100644
index 0000000000..a253a6488c
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+    audio: str
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Literal["input_audio_buffer.append"]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py
new file mode 100644
index 0000000000..3ad0bc737d
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+    audio: Required[str]
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Required[Literal["input_audio_buffer.append"]]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py
new file mode 100644
index 0000000000..b0624d34df
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+    type: Literal["input_audio_buffer.clear"]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py
new file mode 100644
index 0000000000..2bd6bc5a02
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.clear"]]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py
new file mode 100644
index 0000000000..632e1b94bc
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["input_audio_buffer.cleared"]
+    """The event type, must be `input_audio_buffer.cleared`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py
new file mode 100644
index 0000000000..7b6f5e46b7
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+    type: Literal["input_audio_buffer.commit"]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py
new file mode 100644
index 0000000000..c9c927ab98
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.commit"]]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py
new file mode 100644
index 0000000000..3071eff357
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted."""
+
+    type: Literal["input_audio_buffer.committed"]
+    """The event type, must be `input_audio_buffer.committed`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py
new file mode 100644
index 0000000000..4f3ab082c4
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+    audio_start_ms: int
+    """
+    Milliseconds from the start of all audio written to the buffer during the
+    session when speech was first detected. This will correspond to the beginning of
+    audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+    the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created when speech stops."""
+
+    type: Literal["input_audio_buffer.speech_started"]
+    """The event type, must be `input_audio_buffer.speech_started`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py
new file mode 100644
index 0000000000..40568170f2
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+    audio_end_ms: int
+    """Milliseconds since the session started when speech stopped.
+
+    This will correspond to the end of audio sent to the model, and thus includes
+    the `min_silence_duration_ms` configured in the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    type: Literal["input_audio_buffer.speech_stopped"]
+    """The event type, must be `input_audio_buffer.speech_stopped`."""
diff --git a/src/openai/types/beta/realtime/rate_limits_updated_event.py b/src/openai/types/beta/realtime/rate_limits_updated_event.py
new file mode 100644
index 0000000000..7e12283c46
--- /dev/null
+++ b/src/openai/types/beta/realtime/rate_limits_updated_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+    limit: Optional[int] = None
+    """The maximum allowed value for the rate limit."""
+
+    name: Optional[Literal["requests", "tokens"]] = None
+    """The name of the rate limit (`requests`, `tokens`)."""
+
+    remaining: Optional[int] = None
+    """The remaining value before the limit is reached."""
+
+    reset_seconds: Optional[float] = None
+    """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    rate_limits: List[RateLimit]
+    """List of rate limit information."""
+
+    type: Literal["rate_limits.updated"]
+    """The event type, must be `rate_limits.updated`."""
diff --git a/src/openai/types/beta/realtime/realtime_client_event.py b/src/openai/types/beta/realtime/realtime_client_event.py
new file mode 100644
index 0000000000..5f4858d688
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_client_event.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .transcription_session_update import TranscriptionSessionUpdate
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent", "OutputAudioBufferClear"]
+
+
+class OutputAudioBufferClear(BaseModel):
+    type: Literal["output_audio_buffer.clear"]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """The unique ID of the client event used for error handling."""
+
+
+RealtimeClientEvent: TypeAlias = Annotated[
+    Union[
+        ConversationItemCreateEvent,
+        ConversationItemDeleteEvent,
+        ConversationItemRetrieveEvent,
+        ConversationItemTruncateEvent,
+        InputAudioBufferAppendEvent,
+        InputAudioBufferClearEvent,
+        OutputAudioBufferClear,
+        InputAudioBufferCommitEvent,
+        ResponseCancelEvent,
+        ResponseCreateEvent,
+        SessionUpdateEvent,
+        TranscriptionSessionUpdate,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/realtime/realtime_client_event_param.py b/src/openai/types/beta/realtime/realtime_client_event_param.py
new file mode 100644
index 0000000000..e7dfba241e
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_client_event_param.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .transcription_session_update_param import TranscriptionSessionUpdateParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .conversation_item_retrieve_event_param import ConversationItemRetrieveEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam", "OutputAudioBufferClear"]
+
+
+class OutputAudioBufferClear(TypedDict, total=False):
+    type: Required[Literal["output_audio_buffer.clear"]]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: str
+    """The unique ID of the client event used for error handling."""
+
+
+RealtimeClientEventParam: TypeAlias = Union[
+    ConversationItemCreateEventParam,
+    ConversationItemDeleteEventParam,
+    ConversationItemRetrieveEventParam,
+    ConversationItemTruncateEventParam,
+    InputAudioBufferAppendEventParam,
+    InputAudioBufferClearEventParam,
+    OutputAudioBufferClear,
+    InputAudioBufferCommitEventParam,
+    ResponseCancelEventParam,
+    ResponseCreateEventParam,
+    SessionUpdateEventParam,
+    TranscriptionSessionUpdateParam,
+]
diff --git a/src/openai/types/beta/realtime/realtime_connect_params.py b/src/openai/types/beta/realtime/realtime_connect_params.py
new file mode 100644
index 0000000000..76474f3de4
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_connect_params.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+    model: Required[str]
diff --git a/src/openai/types/beta/realtime/realtime_response.py b/src/openai/types/beta/realtime/realtime_response.py
new file mode 100644
index 0000000000..8ecfb91c31
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response.py
@@ -0,0 +1,92 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse"]
+
+
+class RealtimeResponse(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the response."""
+
+    conversation_id: Optional[str] = None
+    """
+    Which conversation the response is added to, determined by the `conversation`
+    field in the `response.create` event. If `auto`, the response will be added to
+    the default conversation and the value of `conversation_id` will be an id like
+    `conv_1234`. If `none`, the response will not be added to any conversation and
+    the value of `conversation_id` will be `null`. If responses are being triggered
+    by server VAD, the response will be added to the default conversation, thus the
+    `conversation_id` will be an id like `conv_1234`.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls, that was used in this response.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model used to respond.
+
+    If there are multiple modalities, the model will pick one, for example if
+    `modalities` is `["text", "audio"]`, the model could be responding in either
+    text or audio.
+    """
+
+    object: Optional[Literal["realtime.response"]] = None
+    """The object type, must be `realtime.response`."""
+
+    output: Optional[List[ConversationItem]] = None
+    """The list of output items generated by the response."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    status: Optional[Literal["completed", "cancelled", "failed", "incomplete"]] = None
+    """
+    The final status of the response (`completed`, `cancelled`, `failed`, or
+    `incomplete`).
+    """
+
+    status_details: Optional[RealtimeResponseStatus] = None
+    """Additional details about the status."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    usage: Optional[RealtimeResponseUsage] = None
+    """Usage statistics for the Response, this will correspond to billing.
+
+    A Realtime API session will maintain a conversation context and append new Items
+    to the Conversation, thus output from previous turns (text and audio tokens)
+    will become the input for later turns.
+    """
+
+    voice: Union[
+        str,
+        Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+        None,
+    ] = None
+    """
+    The voice the model used to respond. Current voice options are `alloy`, `ash`,
+    `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
+    `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/realtime_response_status.py b/src/openai/types/beta/realtime/realtime_response_status.py
new file mode 100644
index 0000000000..7189cd58a1
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response_status.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+    error: Optional[Error] = None
+    """
+    A description of the error that caused the response to fail, populated when the
+    `status` is `failed`.
+    """
+
+    reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+    """The reason the Response did not complete.
+
+    For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+    new start of speech) or `client_cancelled` (the client sent a cancel event). For
+    an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+    server-side safety filter activated and cut off the response).
+    """
+
+    type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+    """
+    The type of error that caused the response to fail, corresponding with the
+    `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+    """
diff --git a/src/openai/types/beta/realtime/realtime_response_usage.py b/src/openai/types/beta/realtime/realtime_response_usage.py
new file mode 100644
index 0000000000..7ca822e25e
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response_usage.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseUsage", "InputTokenDetails", "OutputTokenDetails"]
+
+
+class InputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    cached_tokens: Optional[int] = None
+    """The number of cached tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class OutputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class RealtimeResponseUsage(BaseModel):
+    input_token_details: Optional[InputTokenDetails] = None
+    """Details about the input tokens used in the Response."""
+
+    input_tokens: Optional[int] = None
+    """
+    The number of input tokens used in the Response, including text and audio
+    tokens.
+    """
+
+    output_token_details: Optional[OutputTokenDetails] = None
+    """Details about the output tokens used in the Response."""
+
+    output_tokens: Optional[int] = None
+    """
+    The number of output tokens sent in the Response, including text and audio
+    tokens.
+    """
+
+    total_tokens: Optional[int] = None
+    """
+    The total number of tokens in the Response including input and output text and
+    audio tokens.
+    """
diff --git a/src/openai/types/beta/realtime/realtime_server_event.py b/src/openai/types/beta/realtime/realtime_server_event.py
new file mode 100644
index 0000000000..c12f5df977
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_server_event.py
@@ -0,0 +1,133 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from .error_event import ErrorEvent
+from .conversation_item import ConversationItem
+from .response_done_event import ResponseDoneEvent
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = [
+    "RealtimeServerEvent",
+    "ConversationItemRetrieved",
+    "OutputAudioBufferStarted",
+    "OutputAudioBufferStopped",
+    "OutputAudioBufferCleared",
+]
+
+
+class ConversationItemRetrieved(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    type: Literal["conversation.item.retrieved"]
+    """The event type, must be `conversation.item.retrieved`."""
+
+
+class OutputAudioBufferStarted(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.started"]
+    """The event type, must be `output_audio_buffer.started`."""
+
+
+class OutputAudioBufferStopped(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.stopped"]
+    """The event type, must be `output_audio_buffer.stopped`."""
+
+
+class OutputAudioBufferCleared(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.cleared"]
+    """The event type, must be `output_audio_buffer.cleared`."""
+
+
+RealtimeServerEvent: TypeAlias = Annotated[
+    Union[
+        ConversationCreatedEvent,
+        ConversationItemCreatedEvent,
+        ConversationItemDeletedEvent,
+        ConversationItemInputAudioTranscriptionCompletedEvent,
+        ConversationItemInputAudioTranscriptionDeltaEvent,
+        ConversationItemInputAudioTranscriptionFailedEvent,
+        ConversationItemRetrieved,
+        ConversationItemTruncatedEvent,
+        ErrorEvent,
+        InputAudioBufferClearedEvent,
+        InputAudioBufferCommittedEvent,
+        InputAudioBufferSpeechStartedEvent,
+        InputAudioBufferSpeechStoppedEvent,
+        RateLimitsUpdatedEvent,
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseDoneEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        SessionCreatedEvent,
+        SessionUpdatedEvent,
+        TranscriptionSessionUpdatedEvent,
+        OutputAudioBufferStarted,
+        OutputAudioBufferStopped,
+        OutputAudioBufferCleared,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/realtime/response_audio_delta_event.py b/src/openai/types/beta/realtime/response_audio_delta_event.py
new file mode 100644
index 0000000000..8e0128d942
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """Base64-encoded audio data delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.delta"]
+    """The event type, must be `response.audio.delta`."""
diff --git a/src/openai/types/beta/realtime/response_audio_done_event.py b/src/openai/types/beta/realtime/response_audio_done_event.py
new file mode 100644
index 0000000000..68e78bc778
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.done"]
+    """The event type, must be `response.audio.done`."""
diff --git a/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..3609948d10
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The transcript delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio_transcript.delta"]
+    """The event type, must be `response.audio_transcript.delta`."""
diff --git a/src/openai/types/beta/realtime/response_audio_transcript_done_event.py b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..4e4436a95f
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    transcript: str
+    """The final transcript of the audio."""
+
+    type: Literal["response.audio_transcript.done"]
+    """The event type, must be `response.audio_transcript.done`."""
diff --git a/src/openai/types/beta/realtime/response_cancel_event.py b/src/openai/types/beta/realtime/response_cancel_event.py
new file mode 100644
index 0000000000..c5ff991e9a
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_cancel_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+    type: Literal["response.cancel"]
+    """The event type, must be `response.cancel`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: Optional[str] = None
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/beta/realtime/response_cancel_event_param.py b/src/openai/types/beta/realtime/response_cancel_event_param.py
new file mode 100644
index 0000000000..f33740730a
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_cancel_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+    type: Required[Literal["response.cancel"]]
+    """The event type, must be `response.cancel`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: str
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/beta/realtime/response_content_part_added_event.py b/src/openai/types/beta/realtime/response_content_part_added_event.py
new file mode 100644
index 0000000000..45c8f20f97
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_content_part_added_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item to which the content part was added."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that was added."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.added"]
+    """The event type, must be `response.content_part.added`."""
diff --git a/src/openai/types/beta/realtime/response_content_part_done_event.py b/src/openai/types/beta/realtime/response_content_part_done_event.py
new file mode 100644
index 0000000000..3d16116106
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_content_part_done_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that is done."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.done"]
+    """The event type, must be `response.content_part.done`."""
diff --git a/src/openai/types/beta/realtime/response_create_event.py b/src/openai/types/beta/realtime/response_create_event.py
new file mode 100644
index 0000000000..3b8a6de8df
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_create_event.py
@@ -0,0 +1,125 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.metadata import Metadata
+from .conversation_item_with_reference import ConversationItemWithReference
+
+__all__ = ["ResponseCreateEvent", "Response", "ResponseTool"]
+
+
+class ResponseTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(BaseModel):
+    conversation: Union[str, Literal["auto", "none"], None] = None
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Optional[List[ConversationItemWithReference]] = None
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items from the default
+    conversation.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function, like
+    `{"type": "function", "function": {"name": "my_function"}}`.
+    """
+
+    tools: Optional[List[ResponseTool]] = None
+    """Tools (functions) available to the model."""
+
+    voice: Union[
+        str,
+        Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+        None,
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
+
+
+class ResponseCreateEvent(BaseModel):
+    type: Literal["response.create"]
+    """The event type, must be `response.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response: Optional[Response] = None
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/beta/realtime/response_create_event_param.py b/src/openai/types/beta/realtime/response_create_event_param.py
new file mode 100644
index 0000000000..c569d507a0
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_create_event_param.py
@@ -0,0 +1,124 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ...shared_params.metadata import Metadata
+from .conversation_item_with_reference_param import ConversationItemWithReferenceParam
+
+__all__ = ["ResponseCreateEventParam", "Response", "ResponseTool"]
+
+
+class ResponseTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(TypedDict, total=False):
+    conversation: Union[str, Literal["auto", "none"]]
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Iterable[ConversationItemWithReferenceParam]
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items from the default
+    conversation.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function, like
+    `{"type": "function", "function": {"name": "my_function"}}`.
+    """
+
+    tools: Iterable[ResponseTool]
+    """Tools (functions) available to the model."""
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+    ]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+    type: Required[Literal["response.create"]]
+    """The event type, must be `response.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response: Response
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/beta/realtime/response_created_event.py b/src/openai/types/beta/realtime/response_created_event.py
new file mode 100644
index 0000000000..a4990cf095
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.created"]
+    """The event type, must be `response.created`."""
diff --git a/src/openai/types/beta/realtime/response_done_event.py b/src/openai/types/beta/realtime/response_done_event.py
new file mode 100644
index 0000000000..9e655184b6
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_done_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.done"]
+    """The event type, must be `response.done`."""
diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..cdbb64e658
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    call_id: str
+    """The ID of the function call."""
+
+    delta: str
+    """The arguments delta as a JSON string."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The event type, must be `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..0a5db53323
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    arguments: str
+    """The final arguments as a JSON string."""
+
+    call_id: str
+    """The ID of the function call."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.done"]
+    """The event type, must be `response.function_call_arguments.done`."""
diff --git a/src/openai/types/beta/realtime/response_output_item_added_event.py b/src/openai/types/beta/realtime/response_output_item_added_event.py
new file mode 100644
index 0000000000..c89bfdc3be
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_output_item_added_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.added"]
+    """The event type, must be `response.output_item.added`."""
diff --git a/src/openai/types/beta/realtime/response_output_item_done_event.py b/src/openai/types/beta/realtime/response_output_item_done_event.py
new file mode 100644
index 0000000000..b5910e22aa
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_output_item_done_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.done"]
+    """The event type, must be `response.output_item.done`."""
diff --git a/src/openai/types/beta/realtime/response_text_delta_event.py b/src/openai/types/beta/realtime/response_text_delta_event.py
new file mode 100644
index 0000000000..c463b3c3d0
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_text_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The text delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.text.delta"]
+    """The event type, must be `response.text.delta`."""
diff --git a/src/openai/types/beta/realtime/response_text_done_event.py b/src/openai/types/beta/realtime/response_text_done_event.py
new file mode 100644
index 0000000000..020ff41d58
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_text_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    text: str
+    """The final text content."""
+
+    type: Literal["response.text.done"]
+    """The event type, must be `response.text.done`."""
diff --git a/src/openai/types/beta/realtime/session.py b/src/openai/types/beta/realtime/session.py
new file mode 100644
index 0000000000..606fd83851
--- /dev/null
+++ b/src/openai/types/beta/realtime/session.py
@@ -0,0 +1,281 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ...._models import BaseModel
+
+__all__ = [
+    "Session",
+    "InputAudioNoiseReduction",
+    "InputAudioTranscription",
+    "Tool",
+    "Tracing",
+    "TracingTracingConfiguration",
+    "TurnDetection",
+]
+
+
+class InputAudioNoiseReduction(BaseModel):
+    type: Optional[Literal["near_field", "far_field"]] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[str] = None
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: Optional[str] = None
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TracingTracingConfiguration(BaseModel):
+    group_id: Optional[str] = None
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    traces dashboard.
+    """
+
+    metadata: Optional[object] = None
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the traces
+    dashboard.
+    """
+
+    workflow_name: Optional[str] = None
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the traces dashboard.
+    """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration]
+
+
+class TurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Optional[Literal["server_vad", "semantic_vad"]] = None
+    """Type of turn detection."""
+
+
+class Session(BaseModel):
+    id: Optional[str] = None
+    """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: Optional[InputAudioNoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Optional[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    speed: Optional[float] = None
+    """The speed of the model's spoken response.
+
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2].
+
+    For audio models a temperature of 0.8 is highly recommended for best
+    performance.
+    """
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    tracing: Optional[Tracing] = None
+    """Configuration options for tracing.
+
+    Set to null to disable tracing. Once tracing is enabled for a session, the
+    configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+    voice: Union[
+        str,
+        Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+        None,
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py
new file mode 100644
index 0000000000..e04985d2b6
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_params.py
@@ -0,0 +1,298 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "SessionCreateParams",
+    "ClientSecret",
+    "ClientSecretExpiresAfter",
+    "InputAudioNoiseReduction",
+    "InputAudioTranscription",
+    "Tool",
+    "Tracing",
+    "TracingTracingConfiguration",
+    "TurnDetection",
+]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    client_secret: ClientSecret
+    """Configuration options for the generated client secret."""
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: InputAudioNoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: InputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-realtime-preview-2025-06-03",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    speed: float
+    """The speed of the model's spoken response.
+
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+    """
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2].
+
+    For audio models a temperature of 0.8 is highly recommended for best
+    performance.
+    """
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[Tool]
+    """Tools (functions) available to the model."""
+
+    tracing: Tracing
+    """Configuration options for tracing.
+
+    Set to null to disable tracing. Once tracing is enabled for a session, the
+    configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    turn_detection: TurnDetection
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+    ]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
+
+
+class ClientSecretExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["created_at"]]
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: int
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class ClientSecret(TypedDict, total=False):
+    expires_after: ClientSecretExpiresAfter
+    """Configuration for the ephemeral token expiration."""
+
+
+class InputAudioNoiseReduction(TypedDict, total=False):
+    type: Literal["near_field", "far_field"]
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: str
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: str
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class Tool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class TracingTracingConfiguration(TypedDict, total=False):
+    group_id: str
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    traces dashboard.
+    """
+
+    metadata: object
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the traces
+    dashboard.
+    """
+
+    workflow_name: str
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the traces dashboard.
+    """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration]
+
+
+class TurnDetection(TypedDict, total=False):
+    create_response: bool
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Literal["server_vad", "semantic_vad"]
+    """Type of turn detection."""
diff --git a/src/openai/types/beta/realtime/session_create_response.py b/src/openai/types/beta/realtime/session_create_response.py
new file mode 100644
index 0000000000..15d5c1742b
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_response.py
@@ -0,0 +1,200 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ...._models import BaseModel
+
+__all__ = [
+    "SessionCreateResponse",
+    "ClientSecret",
+    "InputAudioTranscription",
+    "Tool",
+    "Tracing",
+    "TracingTracingConfiguration",
+    "TurnDetection",
+]
+
+
+class ClientSecret(BaseModel):
+    expires_at: int
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: str
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """The model to use for transcription."""
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TracingTracingConfiguration(BaseModel):
+    group_id: Optional[str] = None
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    traces dashboard.
+    """
+
+    metadata: Optional[object] = None
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the traces
+    dashboard.
+    """
+
+    workflow_name: Optional[str] = None
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the traces dashboard.
+    """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration]
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionCreateResponse(BaseModel):
+    client_secret: ClientSecret
+    """Ephemeral key returned by the API."""
+
+    input_audio_format: Optional[str] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously and should be treated as rough guidance rather than the
+    representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[str] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    speed: Optional[float] = None
+    """The speed of the model's spoken response.
+
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    tracing: Optional[Tracing] = None
+    """Configuration options for tracing.
+
+    Set to null to disable tracing. Once tracing is enabled for a session, the
+    configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Union[
+        str,
+        Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+        None,
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/session_created_event.py b/src/openai/types/beta/realtime/session_created_event.py
new file mode 100644
index 0000000000..baf6af388b
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionCreatedEvent"]
+
+
+class SessionCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.created"]
+    """The event type, must be `session.created`."""
diff --git a/src/openai/types/beta/realtime/session_update_event.py b/src/openai/types/beta/realtime/session_update_event.py
new file mode 100644
index 0000000000..789b9cd1e5
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_update_event.py
@@ -0,0 +1,314 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ...._models import BaseModel
+
+__all__ = [
+    "SessionUpdateEvent",
+    "Session",
+    "SessionClientSecret",
+    "SessionClientSecretExpiresAfter",
+    "SessionInputAudioNoiseReduction",
+    "SessionInputAudioTranscription",
+    "SessionTool",
+    "SessionTracing",
+    "SessionTracingTracingConfiguration",
+    "SessionTurnDetection",
+]
+
+
+class SessionClientSecretExpiresAfter(BaseModel):
+    anchor: Literal["created_at"]
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: Optional[int] = None
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class SessionClientSecret(BaseModel):
+    expires_after: Optional[SessionClientSecretExpiresAfter] = None
+    """Configuration for the ephemeral token expiration."""
+
+
+class SessionInputAudioNoiseReduction(BaseModel):
+    type: Optional[Literal["near_field", "far_field"]] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class SessionInputAudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[str] = None
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: Optional[str] = None
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class SessionTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTracingTracingConfiguration(BaseModel):
+    group_id: Optional[str] = None
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    traces dashboard.
+    """
+
+    metadata: Optional[object] = None
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the traces
+    dashboard.
+    """
+
+    workflow_name: Optional[str] = None
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the traces dashboard.
+    """
+
+
+SessionTracing: TypeAlias = Union[Literal["auto"], SessionTracingTracingConfiguration]
+
+
+class SessionTurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Optional[Literal["server_vad", "semantic_vad"]] = None
+    """Type of turn detection."""
+
+
+class Session(BaseModel):
+    client_secret: Optional[SessionClientSecret] = None
+    """Configuration options for the generated client secret."""
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: Optional[SessionInputAudioNoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: Optional[SessionInputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Optional[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    speed: Optional[float] = None
+    """The speed of the model's spoken response.
+
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2].
+
+    For audio models a temperature of 0.8 is highly recommended for best
+    performance.
+    """
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[SessionTool]] = None
+    """Tools (functions) available to the model."""
+
+    tracing: Optional[SessionTracing] = None
+    """Configuration options for tracing.
+
+    Set to null to disable tracing. Once tracing is enabled for a session, the
+    configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    turn_detection: Optional[SessionTurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+    voice: Union[
+        str,
+        Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"],
+        None,
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
+
+
+class SessionUpdateEvent(BaseModel):
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.update"]
+    """The event type, must be `session.update`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/session_update_event_param.py b/src/openai/types/beta/realtime/session_update_event_param.py
new file mode 100644
index 0000000000..2dfa2c26f3
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_update_event_param.py
@@ -0,0 +1,310 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "SessionUpdateEventParam",
+    "Session",
+    "SessionClientSecret",
+    "SessionClientSecretExpiresAfter",
+    "SessionInputAudioNoiseReduction",
+    "SessionInputAudioTranscription",
+    "SessionTool",
+    "SessionTracing",
+    "SessionTracingTracingConfiguration",
+    "SessionTurnDetection",
+]
+
+
+class SessionClientSecretExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["created_at"]]
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: int
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class SessionClientSecret(TypedDict, total=False):
+    expires_after: SessionClientSecretExpiresAfter
+    """Configuration for the ephemeral token expiration."""
+
+
+class SessionInputAudioNoiseReduction(TypedDict, total=False):
+    type: Literal["near_field", "far_field"]
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class SessionInputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: str
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: str
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class SessionTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTracingTracingConfiguration(TypedDict, total=False):
+    group_id: str
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    traces dashboard.
+    """
+
+    metadata: object
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the traces
+    dashboard.
+    """
+
+    workflow_name: str
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the traces dashboard.
+    """
+
+
+SessionTracing: TypeAlias = Union[Literal["auto"], SessionTracingTracingConfiguration]
+
+
+class SessionTurnDetection(TypedDict, total=False):
+    create_response: bool
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Literal["server_vad", "semantic_vad"]
+    """Type of turn detection."""
+
+
+class Session(TypedDict, total=False):
+    client_secret: SessionClientSecret
+    """Configuration options for the generated client secret."""
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: SessionInputAudioNoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: SessionInputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-realtime-preview-2025-06-03",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    speed: float
+    """The speed of the model's spoken response.
+
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+    """
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2].
+
+    For audio models a temperature of 0.8 is highly recommended for best
+    performance.
+    """
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[SessionTool]
+    """Tools (functions) available to the model."""
+
+    tracing: SessionTracing
+    """Configuration options for tracing.
+
+    Set to null to disable tracing. Once tracing is enabled for a session, the
+    configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    turn_detection: SessionTurnDetection
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+    ]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`.
+    """
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+    session: Required[Session]
+    """Realtime session object configuration."""
+
+    type: Required[Literal["session.update"]]
+    """The event type, must be `session.update`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/session_updated_event.py b/src/openai/types/beta/realtime/session_updated_event.py
new file mode 100644
index 0000000000..b9b6488eb3
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_updated_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdatedEvent"]
+
+
+class SessionUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.updated"]
+    """The event type, must be `session.updated`."""
diff --git a/src/openai/types/beta/realtime/transcription_session.py b/src/openai/types/beta/realtime/transcription_session.py
new file mode 100644
index 0000000000..7c7abf37b6
--- /dev/null
+++ b/src/openai/types/beta/realtime/transcription_session.py
@@ -0,0 +1,100 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["TranscriptionSession", "ClientSecret", "InputAudioTranscription", "TurnDetection"]
+
+
+class ClientSecret(BaseModel):
+    expires_at: int
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: str
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+    """The model to use for transcription.
+
+    Can be `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, or `whisper-1`.
+    """
+
+    prompt: Optional[str] = None
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language.
+    """
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class TranscriptionSession(BaseModel):
+    client_secret: ClientSecret
+    """Ephemeral key returned by the API.
+
+    Only present when the session is created on the server via REST API.
+    """
+
+    input_audio_format: Optional[str] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """Configuration of the transcription model."""
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
diff --git a/src/openai/types/beta/realtime/transcription_session_create_params.py b/src/openai/types/beta/realtime/transcription_session_create_params.py
new file mode 100644
index 0000000000..15b2f14c14
--- /dev/null
+++ b/src/openai/types/beta/realtime/transcription_session_create_params.py
@@ -0,0 +1,173 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+__all__ = [
+    "TranscriptionSessionCreateParams",
+    "ClientSecret",
+    "ClientSecretExpiresAt",
+    "InputAudioNoiseReduction",
+    "InputAudioTranscription",
+    "TurnDetection",
+]
+
+
+class TranscriptionSessionCreateParams(TypedDict, total=False):
+    client_secret: ClientSecret
+    """Configuration options for the generated client secret."""
+
+    include: List[str]
+    """The set of items to include in the transcription. Current available items are:
+
+    - `item.input_audio_transcription.logprobs`
+    """
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: InputAudioNoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: InputAudioTranscription
+    """Configuration for input audio transcription.
+
+    The client can optionally set the language and prompt for transcription, these
+    offer additional guidance to the transcription service.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    turn_detection: TurnDetection
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+
+class ClientSecretExpiresAt(TypedDict, total=False):
+    anchor: Literal["created_at"]
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: int
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class ClientSecret(TypedDict, total=False):
+    expires_at: ClientSecretExpiresAt
+    """Configuration for the ephemeral token expiration."""
+
+
+class InputAudioNoiseReduction(TypedDict, total=False):
+    type: Literal["near_field", "far_field"]
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: str
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class TurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    Not available for transcription sessions.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. Not available for transcription sessions.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Literal["server_vad", "semantic_vad"]
+    """Type of turn detection."""
diff --git a/src/openai/types/beta/realtime/transcription_session_update.py b/src/openai/types/beta/realtime/transcription_session_update.py
new file mode 100644
index 0000000000..73253b6848
--- /dev/null
+++ b/src/openai/types/beta/realtime/transcription_session_update.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = [
+    "TranscriptionSessionUpdate",
+    "Session",
+    "SessionClientSecret",
+    "SessionClientSecretExpiresAt",
+    "SessionInputAudioNoiseReduction",
+    "SessionInputAudioTranscription",
+    "SessionTurnDetection",
+]
+
+
+class SessionClientSecretExpiresAt(BaseModel):
+    anchor: Optional[Literal["created_at"]] = None
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: Optional[int] = None
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class SessionClientSecret(BaseModel):
+    expires_at: Optional[SessionClientSecretExpiresAt] = None
+    """Configuration for the ephemeral token expiration."""
+
+
+class SessionInputAudioNoiseReduction(BaseModel):
+    type: Optional[Literal["near_field", "far_field"]] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class SessionInputAudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]] = None
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: Optional[str] = None
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class SessionTurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    Not available for transcription sessions.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. Not available for transcription sessions.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Optional[Literal["server_vad", "semantic_vad"]] = None
+    """Type of turn detection."""
+
+
+class Session(BaseModel):
+    client_secret: Optional[SessionClientSecret] = None
+    """Configuration options for the generated client secret."""
+
+    include: Optional[List[str]] = None
+    """The set of items to include in the transcription. Current available items are:
+
+    - `item.input_audio_transcription.logprobs`
+    """
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: Optional[SessionInputAudioNoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: Optional[SessionInputAudioTranscription] = None
+    """Configuration for input audio transcription.
+
+    The client can optionally set the language and prompt for transcription, these
+    offer additional guidance to the transcription service.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    turn_detection: Optional[SessionTurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+
+class TranscriptionSessionUpdate(BaseModel):
+    session: Session
+    """Realtime transcription session object configuration."""
+
+    type: Literal["transcription_session.update"]
+    """The event type, must be `transcription_session.update`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/transcription_session_update_param.py b/src/openai/types/beta/realtime/transcription_session_update_param.py
new file mode 100644
index 0000000000..6b38a9af39
--- /dev/null
+++ b/src/openai/types/beta/realtime/transcription_session_update_param.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "TranscriptionSessionUpdateParam",
+    "Session",
+    "SessionClientSecret",
+    "SessionClientSecretExpiresAt",
+    "SessionInputAudioNoiseReduction",
+    "SessionInputAudioTranscription",
+    "SessionTurnDetection",
+]
+
+
+class SessionClientSecretExpiresAt(TypedDict, total=False):
+    anchor: Literal["created_at"]
+    """The anchor point for the ephemeral token expiration.
+
+    Only `created_at` is currently supported.
+    """
+
+    seconds: int
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200`.
+    """
+
+
+class SessionClientSecret(TypedDict, total=False):
+    expires_at: SessionClientSecretExpiresAt
+    """Configuration for the ephemeral token expiration."""
+
+
+class SessionInputAudioNoiseReduction(TypedDict, total=False):
+    type: Literal["near_field", "far_field"]
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class SessionInputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"]
+    """
+    The model to use for transcription, current options are `gpt-4o-transcribe`,
+    `gpt-4o-mini-transcribe`, and `whisper-1`.
+    """
+
+    prompt: str
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+    "expect words related to technology".
+    """
+
+
+class SessionTurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    Not available for transcription sessions.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. Not available for transcription sessions.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+    type: Literal["server_vad", "semantic_vad"]
+    """Type of turn detection."""
+
+
+class Session(TypedDict, total=False):
+    client_secret: SessionClientSecret
+    """Configuration options for the generated client secret."""
+
+    include: List[str]
+    """The set of items to include in the transcription. Current available items are:
+
+    - `item.input_audio_transcription.logprobs`
+    """
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_noise_reduction: SessionInputAudioNoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    input_audio_transcription: SessionInputAudioTranscription
+    """Configuration for input audio transcription.
+
+    The client can optionally set the language and prompt for transcription, these
+    offer additional guidance to the transcription service.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    turn_detection: SessionTurnDetection
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response. Server VAD means that the model will detect the start
+    and end of speech based on audio volume and respond at the end of user speech.
+    Semantic VAD is more advanced and uses a turn detection model (in conjuction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+
+class TranscriptionSessionUpdateParam(TypedDict, total=False):
+    session: Required[Session]
+    """Realtime transcription session object configuration."""
+
+    type: Required[Literal["transcription_session.update"]]
+    """The event type, must be `transcription_session.update`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/transcription_session_updated_event.py b/src/openai/types/beta/realtime/transcription_session_updated_event.py
new file mode 100644
index 0000000000..1f1fbdae14
--- /dev/null
+++ b/src/openai/types/beta/realtime/transcription_session_updated_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .transcription_session import TranscriptionSession
+
+__all__ = ["TranscriptionSessionUpdatedEvent"]
+
+
+class TranscriptionSessionUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: TranscriptionSession
+    """A new Realtime transcription session configuration.
+
+    When a session is created on the server via REST API, the session object also
+    contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
+    not present when a session is updated via the WebSocket API.
+    """
+
+    type: Literal["transcription_session.updated"]
+    """The event type, must be `transcription_session.updated`."""
diff --git a/src/openai/types/beta/thread.py b/src/openai/types/beta/thread.py
index a340bffd60..789f66e48b 100644
--- a/src/openai/types/beta/thread.py
+++ b/src/openai/types/beta/thread.py
@@ -1,12 +1,37 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..shared.metadata import Metadata
 
-__all__ = ["Thread"]
+__all__ = ["Thread", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
+
+
+class ToolResourcesCodeInterpreter(BaseModel):
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(BaseModel):
+    vector_store_ids: Optional[List[str]] = None
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(BaseModel):
+    code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
+
+    file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Thread(BaseModel):
@@ -16,13 +41,23 @@ class Thread(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the thread was created."""
 
-    metadata: Optional[builtins.object]
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread"]
     """The object type, which is always `thread`."""
+
+    tool_resources: Optional[ToolResources] = None
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 9f58dcd875..d813710579 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -1,24 +1,43 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ...types import shared_params
+from ..shared.chat_model import ChatModel
+from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .threads.message_content_part_param import MessageContentPartParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
-    "ThreadCreateAndRunParams",
+    "ThreadCreateAndRunParamsBase",
     "Thread",
     "ThreadMessage",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "ThreadMessageAttachment",
+    "ThreadMessageAttachmentTool",
+    "ThreadMessageAttachmentToolFileSearch",
+    "ThreadToolResources",
+    "ThreadToolResourcesCodeInterpreter",
+    "ThreadToolResourcesFileSearch",
+    "ThreadToolResourcesFileSearchVectorStore",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "TruncationStrategy",
+    "ThreadCreateAndRunParamsNonStreaming",
+    "ThreadCreateAndRunParamsStreaming",
 ]
 
 
-class ThreadCreateAndRunParams(TypedDict, total=False):
+class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -32,15 +51,35 @@ class ThreadCreateAndRunParams(TypedDict, total=False):
     This is useful for modifying the behavior on a per-run basis.
     """
 
-    metadata: Optional[object]
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    model: Optional[str]
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -48,74 +87,310 @@ class ThreadCreateAndRunParams(TypedDict, total=False):
     assistant will be used.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
     thread: Thread
-    """If no thread is provided, an empty thread will be created."""
+    """Options to create a new thread.
+
+    If no thread is provided when running a request, an empty thread will be
+    created.
+    """
+
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
 
-    tools: Optional[List[Tool]]
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+class ThreadMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+ThreadMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, ThreadMessageAttachmentToolFileSearch]
+
+
+class ThreadMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[ThreadMessageAttachmentTool]
+    """The tools to add this file to."""
+
 
 class ThreadMessage(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
+
+    attachments: Optional[Iterable[ThreadMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
 
-    Currently only `user` is supported.
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ThreadToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto,
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic,
+]
+
+
+class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ThreadToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
     """
 
     file_ids: List[str]
     """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
+class ThreadToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ThreadToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ThreadToolResources(TypedDict, total=False):
+    code_interpreter: ThreadToolResourcesCodeInterpreter
+
+    file_search: ThreadToolResourcesFileSearch
+
+
 class Thread(TypedDict, total=False):
-    messages: List[ThreadMessage]
+    messages: Iterable[ThreadMessage]
     """
     A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    tool_resources: Optional[ThreadToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
     """
 
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this assistant. There can be a maximum of 1 vector store attached to
+    the assistant.
+    """
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
 
+    file_search: ToolResourcesFileSearch
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class ThreadCreateAndRunParamsStreaming(ThreadCreateAndRunParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+ThreadCreateAndRunParams = Union[ThreadCreateAndRunParamsNonStreaming, ThreadCreateAndRunParamsStreaming]
diff --git a/src/openai/types/beta/thread_create_params.py b/src/openai/types/beta/thread_create_params.py
index d2ec78bbc3..ec1ccf19a6 100644
--- a/src/openai/types/beta/thread_create_params.py
+++ b/src/openai/types/beta/thread_create_params.py
@@ -1,51 +1,185 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-__all__ = ["ThreadCreateParams", "Message"]
+from ..shared_params.metadata import Metadata
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .threads.message_content_part_param import MessageContentPartParam
+
+__all__ = [
+    "ThreadCreateParams",
+    "Message",
+    "MessageAttachment",
+    "MessageAttachmentTool",
+    "MessageAttachmentToolFileSearch",
+    "ToolResources",
+    "ToolResourcesCodeInterpreter",
+    "ToolResourcesFileSearch",
+    "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
+]
 
 
 class ThreadCreateParams(TypedDict, total=False):
-    messages: List[Message]
+    messages: Iterable[Message]
     """
     A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    tool_resources: Optional[ToolResources]
     """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class MessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+MessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, MessageAttachmentToolFileSearch]
+
+
+class MessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[MessageAttachmentTool]
+    """The tools to add this file to."""
 
 
 class Message(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
 
-    Currently only `user` is supported.
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
     """
 
+    attachments: Optional[Iterable[MessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
     file_ids: List[str]
     """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
     """
 
-    metadata: Optional[object]
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
     """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/thread_deleted.py b/src/openai/types/beta/thread_deleted.py
index 410ac1aea0..d385626319 100644
--- a/src/openai/types/beta/thread_deleted.py
+++ b/src/openai/types/beta/thread_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/thread_update_params.py b/src/openai/types/beta/thread_update_params.py
index 6c1d32fc57..b47ea8f3b0 100644
--- a/src/openai/types/beta/thread_update_params.py
+++ b/src/openai/types/beta/thread_update_params.py
@@ -1,18 +1,55 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import List, Optional
 from typing_extensions import TypedDict
 
-__all__ = ["ThreadUpdateParams"]
+from ..shared_params.metadata import Metadata
+
+__all__ = ["ThreadUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class ThreadUpdateParams(TypedDict, total=False):
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    tool_resources: Optional[ToolResources]
+    """
+    A set of resources that are made available to the assistant's tools in this
+    thread. The resources are specific to the type of tool. For example, the
+    `code_interpreter` tool requires a list of file IDs, while the `file_search`
+    tool requires a list of vector store IDs.
+    """
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
     """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+
+class ToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/threads/__init__.py b/src/openai/types/beta/threads/__init__.py
index 0cb557a514..70853177bd 100644
--- a/src/openai/types/beta/threads/__init__.py
+++ b/src/openai/types/beta/threads/__init__.py
@@ -1,22 +1,46 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .run import Run as Run
-from .thread_message import ThreadMessage as ThreadMessage
+from .text import Text as Text
+from .message import Message as Message
+from .image_url import ImageURL as ImageURL
+from .annotation import Annotation as Annotation
+from .image_file import ImageFile as ImageFile
+from .run_status import RunStatus as RunStatus
+from .text_delta import TextDelta as TextDelta
+from .message_delta import MessageDelta as MessageDelta
+from .image_url_delta import ImageURLDelta as ImageURLDelta
+from .image_url_param import ImageURLParam as ImageURLParam
+from .message_content import MessageContent as MessageContent
+from .message_deleted import MessageDeleted as MessageDeleted
 from .run_list_params import RunListParams as RunListParams
+from .annotation_delta import AnnotationDelta as AnnotationDelta
+from .image_file_delta import ImageFileDelta as ImageFileDelta
+from .image_file_param import ImageFileParam as ImageFileParam
+from .text_delta_block import TextDeltaBlock as TextDeltaBlock
 from .run_create_params import RunCreateParams as RunCreateParams
 from .run_update_params import RunUpdateParams as RunUpdateParams
+from .text_content_block import TextContentBlock as TextContentBlock
+from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
 from .message_list_params import MessageListParams as MessageListParams
-from .message_content_text import MessageContentText as MessageContentText
+from .refusal_delta_block import RefusalDeltaBlock as RefusalDeltaBlock
+from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
+from .image_url_delta_block import ImageURLDeltaBlock as ImageURLDeltaBlock
+from .message_content_delta import MessageContentDelta as MessageContentDelta
 from .message_create_params import MessageCreateParams as MessageCreateParams
 from .message_update_params import MessageUpdateParams as MessageUpdateParams
-from .message_content_image_file import (
-    MessageContentImageFile as MessageContentImageFile,
-)
-from .run_submit_tool_outputs_params import (
-    RunSubmitToolOutputsParams as RunSubmitToolOutputsParams,
-)
-from .required_action_function_tool_call import (
-    RequiredActionFunctionToolCall as RequiredActionFunctionToolCall,
-)
+from .refusal_content_block import RefusalContentBlock as RefusalContentBlock
+from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
+from .image_url_content_block import ImageURLContentBlock as ImageURLContentBlock
+from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
+from .image_file_content_block import ImageFileContentBlock as ImageFileContentBlock
+from .text_content_block_param import TextContentBlockParam as TextContentBlockParam
+from .file_path_delta_annotation import FilePathDeltaAnnotation as FilePathDeltaAnnotation
+from .message_content_part_param import MessageContentPartParam as MessageContentPartParam
+from .image_url_content_block_param import ImageURLContentBlockParam as ImageURLContentBlockParam
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation as FileCitationDeltaAnnotation
+from .image_file_content_block_param import ImageFileContentBlockParam as ImageFileContentBlockParam
+from .run_submit_tool_outputs_params import RunSubmitToolOutputsParams as RunSubmitToolOutputsParams
+from .required_action_function_tool_call import RequiredActionFunctionToolCall as RequiredActionFunctionToolCall
diff --git a/src/openai/types/beta/threads/annotation.py b/src/openai/types/beta/threads/annotation.py
new file mode 100644
index 0000000000..13c10abf4d
--- /dev/null
+++ b/src/openai/types/beta/threads/annotation.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .file_path_annotation import FilePathAnnotation
+from .file_citation_annotation import FileCitationAnnotation
+
+__all__ = ["Annotation"]
+
+Annotation: TypeAlias = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/beta/threads/annotation_delta.py b/src/openai/types/beta/threads/annotation_delta.py
new file mode 100644
index 0000000000..c7c6c89837
--- /dev/null
+++ b/src/openai/types/beta/threads/annotation_delta.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .file_path_delta_annotation import FilePathDeltaAnnotation
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation
+
+__all__ = ["AnnotationDelta"]
+
+AnnotationDelta: TypeAlias = Annotated[
+    Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/file_citation_annotation.py b/src/openai/types/beta/threads/file_citation_annotation.py
new file mode 100644
index 0000000000..c3085aed9b
--- /dev/null
+++ b/src/openai/types/beta/threads/file_citation_annotation.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: str
+    """The ID of the specific File the citation is from."""
+
+
+class FileCitationAnnotation(BaseModel):
+    end_index: int
+
+    file_citation: FileCitation
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
diff --git a/src/openai/types/beta/threads/file_citation_delta_annotation.py b/src/openai/types/beta/threads/file_citation_delta_annotation.py
new file mode 100644
index 0000000000..b40c0d123e
--- /dev/null
+++ b/src/openai/types/beta/threads/file_citation_delta_annotation.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationDeltaAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the specific File the citation is from."""
+
+    quote: Optional[str] = None
+    """The specific quote in the file."""
+
+
+class FileCitationDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
+
+    end_index: Optional[int] = None
+
+    file_citation: Optional[FileCitation] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/src/openai/types/beta/threads/file_path_annotation.py b/src/openai/types/beta/threads/file_path_annotation.py
new file mode 100644
index 0000000000..9812737ece
--- /dev/null
+++ b/src/openai/types/beta/threads/file_path_annotation.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: str
+    """The ID of the file that was generated."""
+
+
+class FilePathAnnotation(BaseModel):
+    end_index: int
+
+    file_path: FilePath
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
diff --git a/src/openai/types/beta/threads/file_path_delta_annotation.py b/src/openai/types/beta/threads/file_path_delta_annotation.py
new file mode 100644
index 0000000000..0cbb445e48
--- /dev/null
+++ b/src/openai/types/beta/threads/file_path_delta_annotation.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathDeltaAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file that was generated."""
+
+
+class FilePathDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
+
+    end_index: Optional[int] = None
+
+    file_path: Optional[FilePath] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
diff --git a/src/openai/types/beta/threads/image_file.py b/src/openai/types/beta/threads/image_file.py
new file mode 100644
index 0000000000..6000d97500
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFile"]
+
+
+class ImageFile(BaseModel):
+    file_id: str
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
diff --git a/src/openai/types/beta/threads/image_file_content_block.py b/src/openai/types/beta/threads/image_file_content_block.py
new file mode 100644
index 0000000000..a909999065
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file import ImageFile
+
+__all__ = ["ImageFileContentBlock"]
+
+
+class ImageFileContentBlock(BaseModel):
+    image_file: ImageFile
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/image_file_content_block_param.py b/src/openai/types/beta/threads/image_file_content_block_param.py
new file mode 100644
index 0000000000..48d94bee36
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_content_block_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .image_file_param import ImageFileParam
+
+__all__ = ["ImageFileContentBlockParam"]
+
+
+class ImageFileContentBlockParam(TypedDict, total=False):
+    image_file: Required[ImageFileParam]
+
+    type: Required[Literal["image_file"]]
+    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/image_file_delta.py b/src/openai/types/beta/threads/image_file_delta.py
new file mode 100644
index 0000000000..4581184c7a
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_delta.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFileDelta"]
+
+
+class ImageFileDelta(BaseModel):
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
+
+    file_id: Optional[str] = None
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
diff --git a/src/openai/types/beta/threads/image_file_delta_block.py b/src/openai/types/beta/threads/image_file_delta_block.py
new file mode 100644
index 0000000000..0a5a2e8a5f
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file_delta import ImageFileDelta
+
+__all__ = ["ImageFileDeltaBlock"]
+
+
+class ImageFileDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
+
+    image_file: Optional[ImageFileDelta] = None
diff --git a/src/openai/types/beta/threads/image_file_param.py b/src/openai/types/beta/threads/image_file_param.py
new file mode 100644
index 0000000000..e4a85358b9
--- /dev/null
+++ b/src/openai/types/beta/threads/image_file_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ImageFileParam"]
+
+
+class ImageFileParam(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content. Set `purpose="vision"` when uploading the File if you
+    need to later display the file content.
+    """
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image if specified by the user.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
diff --git a/src/openai/types/beta/threads/image_url.py b/src/openai/types/beta/threads/image_url.py
new file mode 100644
index 0000000000..d1fac147b2
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageURL"]
+
+
+class ImageURL(BaseModel):
+    url: str
+    """
+    The external URL of the image, must be a supported image types: jpeg, jpg, png,
+    gif, webp.
+    """
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`. Default
+    value is `auto`
+    """
diff --git a/src/openai/types/beta/threads/image_url_content_block.py b/src/openai/types/beta/threads/image_url_content_block.py
new file mode 100644
index 0000000000..40a16c1df8
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .image_url import ImageURL
+from ...._models import BaseModel
+
+__all__ = ["ImageURLContentBlock"]
+
+
+class ImageURLContentBlock(BaseModel):
+    image_url: ImageURL
+
+    type: Literal["image_url"]
+    """The type of the content part."""
diff --git a/src/openai/types/beta/threads/image_url_content_block_param.py b/src/openai/types/beta/threads/image_url_content_block_param.py
new file mode 100644
index 0000000000..585b926c58
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_content_block_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .image_url_param import ImageURLParam
+
+__all__ = ["ImageURLContentBlockParam"]
+
+
+class ImageURLContentBlockParam(TypedDict, total=False):
+    image_url: Required[ImageURLParam]
+
+    type: Required[Literal["image_url"]]
+    """The type of the content part."""
diff --git a/src/openai/types/beta/threads/image_url_delta.py b/src/openai/types/beta/threads/image_url_delta.py
new file mode 100644
index 0000000000..e402671908
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_delta.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ImageURLDelta"]
+
+
+class ImageURLDelta(BaseModel):
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`.
+    """
+
+    url: Optional[str] = None
+    """
+    The URL of the image, must be a supported image types: jpeg, jpg, png, gif,
+    webp.
+    """
diff --git a/src/openai/types/beta/threads/image_url_delta_block.py b/src/openai/types/beta/threads/image_url_delta_block.py
new file mode 100644
index 0000000000..5252da12dd
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_url_delta import ImageURLDelta
+
+__all__ = ["ImageURLDeltaBlock"]
+
+
+class ImageURLDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_url"]
+    """Always `image_url`."""
+
+    image_url: Optional[ImageURLDelta] = None
diff --git a/src/openai/types/beta/threads/image_url_param.py b/src/openai/types/beta/threads/image_url_param.py
new file mode 100644
index 0000000000..6b7e427edd
--- /dev/null
+++ b/src/openai/types/beta/threads/image_url_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ImageURLParam"]
+
+
+class ImageURLParam(TypedDict, total=False):
+    url: Required[str]
+    """
+    The external URL of the image, must be a supported image types: jpeg, jpg, png,
+    gif, webp.
+    """
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image.
+
+    `low` uses fewer tokens, you can opt in to high resolution using `high`. Default
+    value is `auto`
+    """
diff --git a/src/openai/types/beta/threads/message.py b/src/openai/types/beta/threads/message.py
new file mode 100644
index 0000000000..4a05a128eb
--- /dev/null
+++ b/src/openai/types/beta/threads/message.py
@@ -0,0 +1,103 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ...._models import BaseModel
+from .message_content import MessageContent
+from ...shared.metadata import Metadata
+from ..code_interpreter_tool import CodeInterpreterTool
+
+__all__ = [
+    "Message",
+    "Attachment",
+    "AttachmentTool",
+    "AttachmentToolAssistantToolsFileSearchTypeOnly",
+    "IncompleteDetails",
+]
+
+
+class AttachmentToolAssistantToolsFileSearchTypeOnly(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterTool, AttachmentToolAssistantToolsFileSearchTypeOnly]
+
+
+class Attachment(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file to attach to the message."""
+
+    tools: Optional[List[AttachmentTool]] = None
+    """The tools to add this file to."""
+
+
+class IncompleteDetails(BaseModel):
+    reason: Literal["content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"]
+    """The reason the message is incomplete."""
+
+
+class Message(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    assistant_id: Optional[str] = None
+    """
+    If applicable, the ID of the
+    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
+    authored this message.
+    """
+
+    attachments: Optional[List[Attachment]] = None
+    """A list of files attached to the message, and the tools they were added to."""
+
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was completed."""
+
+    content: List[MessageContent]
+    """The content of the message in array of text and/or images."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the message was created."""
+
+    incomplete_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was marked as incomplete."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """On an incomplete message, details about why the message is incomplete."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    object: Literal["thread.message"]
+    """The object type, which is always `thread.message`."""
+
+    role: Literal["user", "assistant"]
+    """The entity that produced the message. One of `user` or `assistant`."""
+
+    run_id: Optional[str] = None
+    """
+    The ID of the [run](https://platform.openai.com/docs/api-reference/runs)
+    associated with the creation of this message. Value is `null` when messages are
+    created manually using the create message or create thread endpoints.
+    """
+
+    status: Literal["in_progress", "incomplete", "completed"]
+    """
+    The status of the message, which can be either `in_progress`, `incomplete`, or
+    `completed`.
+    """
+
+    thread_id: str
+    """
+    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
+    this message belongs to.
+    """
diff --git a/src/openai/types/beta/threads/message_content.py b/src/openai/types/beta/threads/message_content.py
new file mode 100644
index 0000000000..9523c1e1b9
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .refusal_content_block import RefusalContentBlock
+from .image_url_content_block import ImageURLContentBlock
+from .image_file_content_block import ImageFileContentBlock
+
+__all__ = ["MessageContent"]
+
+
+MessageContent: TypeAlias = Annotated[
+    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock, RefusalContentBlock],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/threads/message_content_delta.py b/src/openai/types/beta/threads/message_content_delta.py
new file mode 100644
index 0000000000..b6e7dfa45a
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content_delta.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .text_delta_block import TextDeltaBlock
+from .refusal_delta_block import RefusalDeltaBlock
+from .image_url_delta_block import ImageURLDeltaBlock
+from .image_file_delta_block import ImageFileDeltaBlock
+
+__all__ = ["MessageContentDelta"]
+
+MessageContentDelta: TypeAlias = Annotated[
+    Union[ImageFileDeltaBlock, TextDeltaBlock, RefusalDeltaBlock, ImageURLDeltaBlock],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/threads/message_content_image_file.py b/src/openai/types/beta/threads/message_content_image_file.py
deleted file mode 100644
index eeba5a633c..0000000000
--- a/src/openai/types/beta/threads/message_content_image_file.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["MessageContentImageFile", "ImageFile"]
-
-
-class ImageFile(BaseModel):
-    file_id: str
-    """
-    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
-    in the message content.
-    """
-
-
-class MessageContentImageFile(BaseModel):
-    image_file: ImageFile
-
-    type: Literal["image_file"]
-    """Always `image_file`."""
diff --git a/src/openai/types/beta/threads/message_content_part_param.py b/src/openai/types/beta/threads/message_content_part_param.py
new file mode 100644
index 0000000000..dc09a01c27
--- /dev/null
+++ b/src/openai/types/beta/threads/message_content_part_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .text_content_block_param import TextContentBlockParam
+from .image_url_content_block_param import ImageURLContentBlockParam
+from .image_file_content_block_param import ImageFileContentBlockParam
+
+__all__ = ["MessageContentPartParam"]
+
+MessageContentPartParam: TypeAlias = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
diff --git a/src/openai/types/beta/threads/message_content_text.py b/src/openai/types/beta/threads/message_content_text.py
deleted file mode 100644
index b529a384c6..0000000000
--- a/src/openai/types/beta/threads/message_content_text.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List, Union
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = [
-    "MessageContentText",
-    "Text",
-    "TextAnnotation",
-    "TextAnnotationFileCitation",
-    "TextAnnotationFileCitationFileCitation",
-    "TextAnnotationFilePath",
-    "TextAnnotationFilePathFilePath",
-]
-
-
-class TextAnnotationFileCitationFileCitation(BaseModel):
-    file_id: str
-    """The ID of the specific File the citation is from."""
-
-    quote: str
-    """The specific quote in the file."""
-
-
-class TextAnnotationFileCitation(BaseModel):
-    end_index: int
-
-    file_citation: TextAnnotationFileCitationFileCitation
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_citation"]
-    """Always `file_citation`."""
-
-
-class TextAnnotationFilePathFilePath(BaseModel):
-    file_id: str
-    """The ID of the file that was generated."""
-
-
-class TextAnnotationFilePath(BaseModel):
-    end_index: int
-
-    file_path: TextAnnotationFilePathFilePath
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_path"]
-    """Always `file_path`."""
-
-
-TextAnnotation = Union[TextAnnotationFileCitation, TextAnnotationFilePath]
-
-
-class Text(BaseModel):
-    annotations: List[TextAnnotation]
-
-    value: str
-    """The data that makes up the text."""
-
-
-class MessageContentText(BaseModel):
-    text: Text
-
-    type: Literal["text"]
-    """Always `text`."""
diff --git a/src/openai/types/beta/threads/message_create_params.py b/src/openai/types/beta/threads/message_create_params.py
index 8733f10b8a..b52386824a 100644
--- a/src/openai/types/beta/threads/message_create_params.py
+++ b/src/openai/types/beta/threads/message_create_params.py
@@ -1,35 +1,55 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-__all__ = ["MessageCreateParams"]
+from ...shared_params.metadata import Metadata
+from .message_content_part_param import MessageContentPartParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool", "AttachmentToolFileSearch"]
 
 
 class MessageCreateParams(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
 
-    role: Required[Literal["user"]]
-    """The role of the entity that is creating the message.
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
 
-    Currently only `user` is supported.
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
     """
 
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
+    attachments: Optional[Iterable[Attachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
+
+
+class AttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AttachmentToolFileSearch]
+
+
+class Attachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AttachmentTool]
+    """The tools to add this file to."""
diff --git a/src/openai/types/beta/threads/message_deleted.py b/src/openai/types/beta/threads/message_deleted.py
new file mode 100644
index 0000000000..48210777fa
--- /dev/null
+++ b/src/openai/types/beta/threads/message_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["MessageDeleted"]
+
+
+class MessageDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["thread.message.deleted"]
diff --git a/src/openai/types/beta/threads/message_delta.py b/src/openai/types/beta/threads/message_delta.py
new file mode 100644
index 0000000000..ecd0dfe319
--- /dev/null
+++ b/src/openai/types/beta/threads/message_delta.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content_delta import MessageContentDelta
+
+__all__ = ["MessageDelta"]
+
+
+class MessageDelta(BaseModel):
+    content: Optional[List[MessageContentDelta]] = None
+    """The content of the message in array of text and/or images."""
+
+    role: Optional[Literal["user", "assistant"]] = None
+    """The entity that produced the message. One of `user` or `assistant`."""
diff --git a/src/openai/types/beta/threads/message_delta_event.py b/src/openai/types/beta/threads/message_delta_event.py
new file mode 100644
index 0000000000..3811cef679
--- /dev/null
+++ b/src/openai/types/beta/threads/message_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_delta import MessageDelta
+
+__all__ = ["MessageDeltaEvent"]
+
+
+class MessageDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the message, which can be referenced in API endpoints."""
+
+    delta: MessageDelta
+    """The delta containing the fields that have changed on the Message."""
+
+    object: Literal["thread.message.delta"]
+    """The object type, which is always `thread.message.delta`."""
diff --git a/src/openai/types/beta/threads/message_list_params.py b/src/openai/types/beta/threads/message_list_params.py
index 31e407bb22..a7c22a66fb 100644
--- a/src/openai/types/beta/threads/message_list_params.py
+++ b/src/openai/types/beta/threads/message_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -21,7 +21,7 @@ class MessageListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
@@ -37,3 +37,6 @@ class MessageListParams(TypedDict, total=False):
 
     `asc` for ascending order and `desc` for descending order.
     """
+
+    run_id: str
+    """Filter messages by the run ID that generated them."""
diff --git a/src/openai/types/beta/threads/message_update_params.py b/src/openai/types/beta/threads/message_update_params.py
index 2e3e1b4b1a..bb078281e6 100644
--- a/src/openai/types/beta/threads/message_update_params.py
+++ b/src/openai/types/beta/threads/message_update_params.py
@@ -1,20 +1,24 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["MessageUpdateParams"]
 
 
 class MessageUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/messages/__init__.py b/src/openai/types/beta/threads/messages/__init__.py
deleted file mode 100644
index 6046f68204..0000000000
--- a/src/openai/types/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from .message_file import MessageFile as MessageFile
-from .file_list_params import FileListParams as FileListParams
diff --git a/src/openai/types/beta/threads/messages/message_file.py b/src/openai/types/beta/threads/messages/message_file.py
deleted file mode 100644
index 5332dee962..0000000000
--- a/src/openai/types/beta/threads/messages/message_file.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["MessageFile"]
-
-
-class MessageFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message file was created."""
-
-    message_id: str
-    """
-    The ID of the [message](https://platform.openai.com/docs/api-reference/messages)
-    that the [File](https://platform.openai.com/docs/api-reference/files) is
-    attached to.
-    """
-
-    object: Literal["thread.message.file"]
-    """The object type, which is always `thread.message.file`."""
diff --git a/src/openai/types/beta/threads/refusal_content_block.py b/src/openai/types/beta/threads/refusal_content_block.py
new file mode 100644
index 0000000000..d54f948554
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_content_block.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalContentBlock"]
+
+
+class RefusalContentBlock(BaseModel):
+    refusal: str
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
diff --git a/src/openai/types/beta/threads/refusal_delta_block.py b/src/openai/types/beta/threads/refusal_delta_block.py
new file mode 100644
index 0000000000..dbd8e62697
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_delta_block.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalDeltaBlock"]
+
+
+class RefusalDeltaBlock(BaseModel):
+    index: int
+    """The index of the refusal part in the message."""
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
+
+    refusal: Optional[str] = None
diff --git a/src/openai/types/beta/threads/required_action_function_tool_call.py b/src/openai/types/beta/threads/required_action_function_tool_call.py
index 0284d0f188..a24dfd068b 100644
--- a/src/openai/types/beta/threads/required_action_function_tool_call.py
+++ b/src/openai/types/beta/threads/required_action_function_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index ffbba1e504..da9418d6f9 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -1,28 +1,39 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import List, Union, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
-from ...shared import FunctionDefinition
 from ...._models import BaseModel
+from .run_status import RunStatus
+from ..assistant_tool import AssistantTool
+from ...shared.metadata import Metadata
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
 
 __all__ = [
     "Run",
+    "IncompleteDetails",
     "LastError",
     "RequiredAction",
     "RequiredActionSubmitToolOutputs",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "TruncationStrategy",
+    "Usage",
 ]
 
 
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+    """The reason why the run is incomplete.
+
+    This will point to which specific token limit was reached over the course of the
+    run.
+    """
+
+
 class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded"]
-    """One of `server_error` or `rate_limit_exceeded`."""
+    code: Literal["server_error", "rate_limit_exceeded", "invalid_prompt"]
+    """One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`."""
 
     message: str
     """A human-readable description of the error."""
@@ -41,24 +52,32 @@ class RequiredAction(BaseModel):
     """For now, this is always `submit_tool_outputs`."""
 
 
-class ToolAssistantToolsCode(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
+class TruncationStrategy(BaseModel):
+    type: Literal["auto", "last_messages"]
+    """The truncation strategy to use for the thread.
 
-class ToolAssistantToolsRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
 
+    last_messages: Optional[int] = None
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
 
-class ToolAssistantToolsFunction(BaseModel):
-    function: FunctionDefinition
 
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
+class Usage(BaseModel):
+    completion_tokens: int
+    """Number of completion tokens used over the course of the run."""
 
+    prompt_tokens: int
+    """Number of prompt tokens used over the course of the run."""
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+    total_tokens: int
+    """Total number of tokens used (prompt + completion)."""
 
 
 class Run(BaseModel):
@@ -72,26 +91,25 @@ class Run(BaseModel):
     execution of this run.
     """
 
-    cancelled_at: Optional[int]
+    cancelled_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was cancelled."""
 
-    completed_at: Optional[int]
+    completed_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was completed."""
 
     created_at: int
     """The Unix timestamp (in seconds) for when the run was created."""
 
-    expires_at: int
+    expires_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run will expire."""
 
-    failed_at: Optional[int]
+    failed_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run failed."""
 
-    file_ids: List[str]
-    """
-    The list of [File](https://platform.openai.com/docs/api-reference/files) IDs the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
     """
 
     instructions: str
@@ -101,15 +119,29 @@ class Run(BaseModel):
     this run.
     """
 
-    last_error: Optional[LastError]
+    last_error: Optional[LastError] = None
     """The last error associated with this run. Will be `null` if there are no errors."""
 
-    metadata: Optional[builtins.object]
+    max_completion_tokens: Optional[int] = None
+    """
+    The maximum number of completion tokens specified to have been used over the
+    course of the run.
+    """
+
+    max_prompt_tokens: Optional[int] = None
+    """
+    The maximum number of prompt tokens specified to have been used over the course
+    of the run.
+    """
+
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -122,22 +154,51 @@ class Run(BaseModel):
     object: Literal["thread.run"]
     """The object type, which is always `thread.run`."""
 
-    required_action: Optional[RequiredAction]
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    required_action: Optional[RequiredAction] = None
     """Details on the action required to continue the run.
 
     Will be `null` if no action is required.
     """
 
-    started_at: Optional[int]
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    started_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was started."""
 
-    status: Literal[
-        "queued", "in_progress", "requires_action", "cancelling", "cancelled", "failed", "completed", "expired"
-    ]
+    status: RunStatus
     """
     The status of the run, which can be either `queued`, `in_progress`,
-    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or
-    `expired`.
+    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`,
+    `incomplete`, or `expired`.
     """
 
     thread_id: str
@@ -146,9 +207,39 @@ class Run(BaseModel):
     that was executed on as a part of this run.
     """
 
-    tools: List[Tool]
+    tool_choice: Optional[AssistantToolChoiceOption] = None
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: List[AssistantTool]
     """
     The list of tools that the
     [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
     this run.
     """
+
+    truncation_strategy: Optional[TruncationStrategy] = None
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+    usage: Optional[Usage] = None
+    """Usage statistics related to the run.
+
+    This value will be `null` if the run is not in a terminal state (i.e.
+    `in_progress`, `queued`, etc.).
+    """
+
+    temperature: Optional[float] = None
+    """The sampling temperature used for this run. If not set, defaults to 1."""
+
+    top_p: Optional[float] = None
+    """The nucleus sampling value used for this run. If not set, defaults to 1."""
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index df92f4fd2c..fc70227862 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -1,22 +1,33 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ....types import shared_params
+from ...shared.chat_model import ChatModel
+from ..assistant_tool_param import AssistantToolParam
+from .runs.run_step_include import RunStepInclude
+from ...shared_params.metadata import Metadata
+from ...shared.reasoning_effort import ReasoningEffort
+from .message_content_part_param import MessageContentPartParam
+from ..code_interpreter_tool_param import CodeInterpreterToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
-    "RunCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "RunCreateParamsBase",
+    "AdditionalMessage",
+    "AdditionalMessageAttachment",
+    "AdditionalMessageAttachmentTool",
+    "AdditionalMessageAttachmentToolFileSearch",
+    "TruncationStrategy",
+    "RunCreateParamsNonStreaming",
+    "RunCreateParamsStreaming",
 ]
 
 
-class RunCreateParams(TypedDict, total=False):
+class RunCreateParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -24,21 +35,64 @@ class RunCreateParams(TypedDict, total=False):
     execute this run.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    additional_instructions: Optional[str]
+    """Appends additional instructions at the end of the instructions for the run.
+
+    This is useful for modifying the behavior on a per-run basis without overriding
+    other instructions.
+    """
+
+    additional_messages: Optional[Iterable[AdditionalMessage]]
+    """Adds additional messages to the thread before creating the run."""
+
     instructions: Optional[str]
-    """Override the default system message of the assistant.
+    """
+    Overrides the
+    [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+    of the assistant. This is useful for modifying the behavior on a per-run basis.
+    """
 
-    This is useful for modifying the behavior on a per-run basis.
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `incomplete`. See
+    `incomplete_details` for more info.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    model: Optional[str]
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -46,28 +100,162 @@ class RunCreateParams(TypedDict, total=False):
     assistant will be used.
     """
 
-    tools: Optional[List[Tool]]
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+    and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling one or more
+    tools. `required` means the model must call one or more tools before responding
+    to the user. Specifying a particular tool like `{"type": "file_search"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
+
+    truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
+
+
+class AdditionalMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AdditionalMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AdditionalMessageAttachmentToolFileSearch]
+
+
+class AdditionalMessageAttachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AdditionalMessageAttachmentTool]
+    """The tools to add this file to."""
+
+
+class AdditionalMessage(TypedDict, total=False):
+    content: Required[Union[str, Iterable[MessageContentPartParam]]]
+    """The text contents of the message."""
+
+    role: Required[Literal["user", "assistant"]]
+    """The role of the entity that is creating the message. Allowed values include:
+
+    - `user`: Indicates the message is sent by an actual user and should be used in
+      most cases to represent user-generated messages.
+    - `assistant`: Indicates the message is generated by the assistant. Use this
+      value to insert messages from the assistant into the conversation.
+    """
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+    attachments: Optional[Iterable[AdditionalMessageAttachment]]
+    """A list of files attached to the message, and the tools they should be added to."""
 
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
 
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
+class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunCreateParamsStreaming(RunCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+RunCreateParams = Union[RunCreateParamsNonStreaming, RunCreateParamsStreaming]
diff --git a/src/openai/types/beta/threads/run_list_params.py b/src/openai/types/beta/threads/run_list_params.py
index 5f41347718..fbea54f6f2 100644
--- a/src/openai/types/beta/threads/run_list_params.py
+++ b/src/openai/types/beta/threads/run_list_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -21,7 +21,7 @@ class RunListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/threads/run_status.py b/src/openai/types/beta/threads/run_status.py
new file mode 100644
index 0000000000..47c7cbd007
--- /dev/null
+++ b/src/openai/types/beta/threads/run_status.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["RunStatus"]
+
+RunStatus: TypeAlias = Literal[
+    "queued",
+    "in_progress",
+    "requires_action",
+    "cancelling",
+    "cancelled",
+    "failed",
+    "completed",
+    "incomplete",
+    "expired",
+]
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index a960f0f06f..147728603a 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -1,17 +1,22 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List
-from typing_extensions import Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["RunSubmitToolOutputsParams", "ToolOutput"]
+__all__ = [
+    "RunSubmitToolOutputsParamsBase",
+    "ToolOutput",
+    "RunSubmitToolOutputsParamsNonStreaming",
+    "RunSubmitToolOutputsParamsStreaming",
+]
 
 
-class RunSubmitToolOutputsParams(TypedDict, total=False):
+class RunSubmitToolOutputsParamsBase(TypedDict, total=False):
     thread_id: Required[str]
 
-    tool_outputs: Required[List[ToolOutput]]
+    tool_outputs: Required[Iterable[ToolOutput]]
     """A list of tools for which the outputs are being submitted."""
 
 
@@ -24,3 +29,24 @@ class ToolOutput(TypedDict, total=False):
     The ID of the tool call in the `required_action` object within the run object
     the output is being submitted for.
     """
+
+
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunSubmitToolOutputsParamsStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+RunSubmitToolOutputsParams = Union[RunSubmitToolOutputsParamsNonStreaming, RunSubmitToolOutputsParamsStreaming]
diff --git a/src/openai/types/beta/threads/run_update_params.py b/src/openai/types/beta/threads/run_update_params.py
index 09f81aa003..fbcbd3fb14 100644
--- a/src/openai/types/beta/threads/run_update_params.py
+++ b/src/openai/types/beta/threads/run_update_params.py
@@ -1,20 +1,24 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["RunUpdateParams"]
 
 
 class RunUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/runs/__init__.py b/src/openai/types/beta/threads/runs/__init__.py
index 72b972a986..467d5d793d 100644
--- a/src/openai/types/beta/threads/runs/__init__.py
+++ b/src/openai/types/beta/threads/runs/__init__.py
@@ -1,13 +1,24 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .run_step import RunStep as RunStep
-from .code_tool_call import CodeToolCall as CodeToolCall
+from .tool_call import ToolCall as ToolCall
+from .run_step_delta import RunStepDelta as RunStepDelta
+from .tool_call_delta import ToolCallDelta as ToolCallDelta
+from .run_step_include import RunStepInclude as RunStepInclude
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall as RetrievalToolCall
+from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .step_retrieve_params import StepRetrieveParams as StepRetrieveParams
+from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
+from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
+from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
 from .tool_calls_step_details import ToolCallsStepDetails as ToolCallsStepDetails
-from .message_creation_step_details import (
-    MessageCreationStepDetails as MessageCreationStepDetails,
-)
+from .function_tool_call_delta import FunctionToolCallDelta as FunctionToolCallDelta
+from .code_interpreter_tool_call import CodeInterpreterToolCall as CodeInterpreterToolCall
+from .file_search_tool_call_delta import FileSearchToolCallDelta as FileSearchToolCallDelta
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta as RunStepDeltaMessageDelta
+from .code_interpreter_output_image import CodeInterpreterOutputImage as CodeInterpreterOutputImage
+from .message_creation_step_details import MessageCreationStepDetails as MessageCreationStepDetails
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta as CodeInterpreterToolCallDelta
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_logs.py b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
new file mode 100644
index 0000000000..0bf8c1dac2
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterLogs"]
+
+
+class CodeInterpreterLogs(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["logs"]
+    """Always `logs`."""
+
+    logs: Optional[str] = None
+    """The text output from the Code Interpreter tool call."""
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_output_image.py b/src/openai/types/beta/threads/runs/code_interpreter_output_image.py
new file mode 100644
index 0000000000..2257f37e41
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_output_image.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterOutputImage", "Image"]
+
+
+class Image(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
+    image.
+    """
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["image"]
+    """Always `image`."""
+
+    image: Optional[Image] = None
diff --git a/src/openai/types/beta/threads/runs/code_tool_call.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
similarity index 77%
rename from src/openai/types/beta/threads/runs/code_tool_call.py
rename to src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
index f808005ecb..e7df4e19c4 100644
--- a/src/openai/types/beta/threads/runs/code_tool_call.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -1,12 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated, TypeAlias
 
+from ....._utils import PropertyInfo
 from ....._models import BaseModel
 
 __all__ = [
-    "CodeToolCall",
+    "CodeInterpreterToolCall",
     "CodeInterpreter",
     "CodeInterpreterOutput",
     "CodeInterpreterOutputLogs",
@@ -38,7 +39,9 @@ class CodeInterpreterOutputImage(BaseModel):
     """Always `image`."""
 
 
-CodeInterpreterOutput = Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage]
+CodeInterpreterOutput: TypeAlias = Annotated[
+    Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
+]
 
 
 class CodeInterpreter(BaseModel):
@@ -53,7 +56,7 @@ class CodeInterpreter(BaseModel):
     """
 
 
-class CodeToolCall(BaseModel):
+class CodeInterpreterToolCall(BaseModel):
     id: str
     """The ID of the tool call."""
 
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
new file mode 100644
index 0000000000..9d7a1563cd
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .code_interpreter_logs import CodeInterpreterLogs
+from .code_interpreter_output_image import CodeInterpreterOutputImage
+
+__all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
+
+CodeInterpreterOutput: TypeAlias = Annotated[
+    Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
+]
+
+
+class CodeInterpreter(BaseModel):
+    input: Optional[str] = None
+    """The input to the Code Interpreter tool call."""
+
+    outputs: Optional[List[CodeInterpreterOutput]] = None
+    """The outputs from the Code Interpreter tool call.
+
+    Code Interpreter can output one or more items, including text (`logs`) or images
+    (`image`). Each of these are represented by a different object type.
+    """
+
+
+class CodeInterpreterToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["code_interpreter"]
+    """The type of tool call.
+
+    This is always going to be `code_interpreter` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call."""
+
+    code_interpreter: Optional[CodeInterpreter] = None
+    """The Code Interpreter tool call definition."""
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call.py b/src/openai/types/beta/threads/runs/file_search_tool_call.py
new file mode 100644
index 0000000000..a2068daad1
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -0,0 +1,78 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = [
+    "FileSearchToolCall",
+    "FileSearch",
+    "FileSearchRankingOptions",
+    "FileSearchResult",
+    "FileSearchResultContent",
+]
+
+
+class FileSearchRankingOptions(BaseModel):
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+
+class FileSearchResultContent(BaseModel):
+    text: Optional[str] = None
+    """The text content of the file."""
+
+    type: Optional[Literal["text"]] = None
+    """The type of the content."""
+
+
+class FileSearchResult(BaseModel):
+    file_id: str
+    """The ID of the file that result was found in."""
+
+    file_name: str
+    """The name of the file that result was found in."""
+
+    score: float
+    """The score of the result.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    content: Optional[List[FileSearchResultContent]] = None
+    """The content of the result that was found.
+
+    The content is only included if requested via the include query parameter.
+    """
+
+
+class FileSearch(BaseModel):
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search."""
+
+    results: Optional[List[FileSearchResult]] = None
+    """The results of the file search."""
+
+
+class FileSearchToolCall(BaseModel):
+    id: str
+    """The ID of the tool call object."""
+
+    file_search: FileSearch
+    """For now, this is always going to be an empty object."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py b/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py
new file mode 100644
index 0000000000..df5ac217dc
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call_delta.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FileSearchToolCallDelta"]
+
+
+class FileSearchToolCallDelta(BaseModel):
+    file_search: object
+    """For now, this is always going to be an empty object."""
+
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["file_search"]
+    """The type of tool call.
+
+    This is always going to be `file_search` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
diff --git a/src/openai/types/beta/threads/runs/function_tool_call.py b/src/openai/types/beta/threads/runs/function_tool_call.py
index f4cf8bbdd0..b1d354f894 100644
--- a/src/openai/types/beta/threads/runs/function_tool_call.py
+++ b/src/openai/types/beta/threads/runs/function_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 from typing_extensions import Literal
@@ -15,7 +15,7 @@ class Function(BaseModel):
     name: str
     """The name of the function."""
 
-    output: Optional[str]
+    output: Optional[str] = None
     """The output of the function.
 
     This will be `null` if the outputs have not been
diff --git a/src/openai/types/beta/threads/runs/function_tool_call_delta.py b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
new file mode 100644
index 0000000000..faaf026f7f
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FunctionToolCallDelta", "Function"]
+
+
+class Function(BaseModel):
+    arguments: Optional[str] = None
+    """The arguments passed to the function."""
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    output: Optional[str] = None
+    """The output of the function.
+
+    This will be `null` if the outputs have not been
+    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    yet.
+    """
+
+
+class FunctionToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["function"]
+    """The type of tool call.
+
+    This is always going to be `function` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
+
+    function: Optional[Function] = None
+    """The definition of the function that was called."""
diff --git a/src/openai/types/beta/threads/runs/message_creation_step_details.py b/src/openai/types/beta/threads/runs/message_creation_step_details.py
index 29f9106ec0..73439079d3 100644
--- a/src/openai/types/beta/threads/runs/message_creation_step_details.py
+++ b/src/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
@@ -16,4 +16,4 @@ class MessageCreationStepDetails(BaseModel):
     message_creation: MessageCreation
 
     type: Literal["message_creation"]
-    """Always `message_creation``."""
+    """Always `message_creation`."""
diff --git a/src/openai/types/beta/threads/runs/retrieval_tool_call.py b/src/openai/types/beta/threads/runs/retrieval_tool_call.py
deleted file mode 100644
index 6cdbcdd93f..0000000000
--- a/src/openai/types/beta/threads/runs/retrieval_tool_call.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["RetrievalToolCall"]
-
-
-class RetrievalToolCall(BaseModel):
-    id: str
-    """The ID of the tool call object."""
-
-    retrieval: object
-    """For now, this is always going to be an empty object."""
-
-    type: Literal["retrieval"]
-    """The type of tool call.
-
-    This is always going to be `retrieval` for this type of tool call.
-    """
diff --git a/src/openai/types/beta/threads/runs/run_step.py b/src/openai/types/beta/threads/runs/run_step.py
index 536cf04ab1..b5f380c7b1 100644
--- a/src/openai/types/beta/threads/runs/run_step.py
+++ b/src/openai/types/beta/threads/runs/run_step.py
@@ -1,14 +1,15 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import Union, Optional
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated, TypeAlias
 
+from ....._utils import PropertyInfo
 from ....._models import BaseModel
+from ....shared.metadata import Metadata
 from .tool_calls_step_details import ToolCallsStepDetails
 from .message_creation_step_details import MessageCreationStepDetails
 
-__all__ = ["RunStep", "LastError", "StepDetails"]
+__all__ = ["RunStep", "LastError", "StepDetails", "Usage"]
 
 
 class LastError(BaseModel):
@@ -19,7 +20,20 @@ class LastError(BaseModel):
     """A human-readable description of the error."""
 
 
-StepDetails = Union[MessageCreationStepDetails, ToolCallsStepDetails]
+StepDetails: TypeAlias = Annotated[
+    Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")
+]
+
+
+class Usage(BaseModel):
+    completion_tokens: int
+    """Number of completion tokens used over the course of the run step."""
+
+    prompt_tokens: int
+    """Number of prompt tokens used over the course of the run step."""
+
+    total_tokens: int
+    """Total number of tokens used (prompt + completion)."""
 
 
 class RunStep(BaseModel):
@@ -33,40 +47,42 @@ class RunStep(BaseModel):
     associated with the run step.
     """
 
-    cancelled_at: Optional[int]
+    cancelled_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run step was cancelled."""
 
-    completed_at: Optional[int]
+    completed_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run step completed."""
 
     created_at: int
     """The Unix timestamp (in seconds) for when the run step was created."""
 
-    expired_at: Optional[int]
+    expired_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run step expired.
 
     A step is considered expired if the parent run is expired.
     """
 
-    failed_at: Optional[int]
+    failed_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run step failed."""
 
-    last_error: Optional[LastError]
+    last_error: Optional[LastError] = None
     """The last error associated with this run step.
 
     Will be `null` if there are no errors.
     """
 
-    metadata: Optional[builtins.object]
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread.run.step"]
-    """The object type, which is always `thread.run.step``."""
+    """The object type, which is always `thread.run.step`."""
 
     run_id: str
     """
@@ -91,3 +107,9 @@ class RunStep(BaseModel):
 
     type: Literal["message_creation", "tool_calls"]
     """The type of run step, which can be either `message_creation` or `tool_calls`."""
+
+    usage: Optional[Usage] = None
+    """Usage statistics related to the run step.
+
+    This value will be `null` while the run step's status is `in_progress`.
+    """
diff --git a/src/openai/types/beta/threads/runs/run_step_delta.py b/src/openai/types/beta/threads/runs/run_step_delta.py
new file mode 100644
index 0000000000..1139088fb4
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Annotated, TypeAlias
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .tool_call_delta_object import ToolCallDeltaObject
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta
+
+__all__ = ["RunStepDelta", "StepDetails"]
+
+StepDetails: TypeAlias = Annotated[
+    Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")
+]
+
+
+class RunStepDelta(BaseModel):
+    step_details: Optional[StepDetails] = None
+    """The details of the run step."""
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_event.py b/src/openai/types/beta/threads/runs/run_step_delta_event.py
new file mode 100644
index 0000000000..7f3f92aabf
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .run_step_delta import RunStepDelta
+
+__all__ = ["RunStepDeltaEvent"]
+
+
+class RunStepDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the run step, which can be referenced in API endpoints."""
+
+    delta: RunStepDelta
+    """The delta containing the fields that have changed on the run step."""
+
+    object: Literal["thread.run.step.delta"]
+    """The object type, which is always `thread.run.step.delta`."""
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
new file mode 100644
index 0000000000..f58ed3d96d
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["RunStepDeltaMessageDelta", "MessageCreation"]
+
+
+class MessageCreation(BaseModel):
+    message_id: Optional[str] = None
+    """The ID of the message that was created by this run step."""
+
+
+class RunStepDeltaMessageDelta(BaseModel):
+    type: Literal["message_creation"]
+    """Always `message_creation`."""
+
+    message_creation: Optional[MessageCreation] = None
diff --git a/src/openai/types/beta/threads/runs/run_step_include.py b/src/openai/types/beta/threads/runs/run_step_include.py
new file mode 100644
index 0000000000..8e76c1b716
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["RunStepInclude"]
+
+RunStepInclude: TypeAlias = Literal["step_details.tool_calls[*].file_search.results[*].content"]
diff --git a/src/openai/types/beta/threads/runs/step_list_params.py b/src/openai/types/beta/threads/runs/step_list_params.py
index 9c7b6c64d0..a6be771d9f 100644
--- a/src/openai/types/beta/threads/runs/step_list_params.py
+++ b/src/openai/types/beta/threads/runs/step_list_params.py
@@ -1,9 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal, Required, TypedDict
 
+from .run_step_include import RunStepInclude
+
 __all__ = ["StepListParams"]
 
 
@@ -23,11 +26,23 @@ class StepListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/beta/threads/runs/step_retrieve_params.py b/src/openai/types/beta/threads/runs/step_retrieve_params.py
new file mode 100644
index 0000000000..ecbb72edbd
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/step_retrieve_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+from .run_step_include import RunStepInclude
+
+__all__ = ["StepRetrieveParams"]
+
+
+class StepRetrieveParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    run_id: Required[str]
+
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
diff --git a/src/openai/types/beta/threads/runs/tool_call.py b/src/openai/types/beta/threads/runs/tool_call.py
new file mode 100644
index 0000000000..565e3109be
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ....._utils import PropertyInfo
+from .function_tool_call import FunctionToolCall
+from .file_search_tool_call import FileSearchToolCall
+from .code_interpreter_tool_call import CodeInterpreterToolCall
+
+__all__ = ["ToolCall"]
+
+ToolCall: TypeAlias = Annotated[
+    Union[CodeInterpreterToolCall, FileSearchToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta.py b/src/openai/types/beta/threads/runs/tool_call_delta.py
new file mode 100644
index 0000000000..f0b8070c97
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call_delta.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ....._utils import PropertyInfo
+from .function_tool_call_delta import FunctionToolCallDelta
+from .file_search_tool_call_delta import FileSearchToolCallDelta
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta
+
+__all__ = ["ToolCallDelta"]
+
+ToolCallDelta: TypeAlias = Annotated[
+    Union[CodeInterpreterToolCallDelta, FileSearchToolCallDelta, FunctionToolCallDelta],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta_object.py b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
new file mode 100644
index 0000000000..189dce772c
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .tool_call_delta import ToolCallDelta
+
+__all__ = ["ToolCallDeltaObject"]
+
+
+class ToolCallDeltaObject(BaseModel):
+    type: Literal["tool_calls"]
+    """Always `tool_calls`."""
+
+    tool_calls: Optional[List[ToolCallDelta]] = None
+    """An array of tool calls the run step was involved in.
+
+    These can be associated with one of three types of tools: `code_interpreter`,
+    `file_search`, or `function`.
+    """
diff --git a/src/openai/types/beta/threads/runs/tool_calls_step_details.py b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
index 80eb90bf66..a084d387c7 100644
--- a/src/openai/types/beta/threads/runs/tool_calls_step_details.py
+++ b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
@@ -1,16 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union
+from typing import List
 from typing_extensions import Literal
 
+from .tool_call import ToolCall
 from ....._models import BaseModel
-from .code_tool_call import CodeToolCall
-from .function_tool_call import FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall
 
-__all__ = ["ToolCallsStepDetails", "ToolCall"]
-
-ToolCall = Union[CodeToolCall, RetrievalToolCall, FunctionToolCall]
+__all__ = ["ToolCallsStepDetails"]
 
 
 class ToolCallsStepDetails(BaseModel):
@@ -18,7 +14,7 @@ class ToolCallsStepDetails(BaseModel):
     """An array of tool calls the run step was involved in.
 
     These can be associated with one of three types of tools: `code_interpreter`,
-    `retrieval`, or `function`.
+    `file_search`, or `function`.
     """
 
     type: Literal["tool_calls"]
diff --git a/src/openai/types/beta/threads/text.py b/src/openai/types/beta/threads/text.py
new file mode 100644
index 0000000000..853bec2955
--- /dev/null
+++ b/src/openai/types/beta/threads/text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ...._models import BaseModel
+from .annotation import Annotation
+
+__all__ = ["Text"]
+
+
+class Text(BaseModel):
+    annotations: List[Annotation]
+
+    value: str
+    """The data that makes up the text."""
diff --git a/src/openai/types/beta/threads/text_content_block.py b/src/openai/types/beta/threads/text_content_block.py
new file mode 100644
index 0000000000..3706d6b9d8
--- /dev/null
+++ b/src/openai/types/beta/threads/text_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .text import Text
+from ...._models import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+    text: Text
+
+    type: Literal["text"]
+    """Always `text`."""
diff --git a/src/openai/types/beta/threads/text_content_block_param.py b/src/openai/types/beta/threads/text_content_block_param.py
new file mode 100644
index 0000000000..6313de32cc
--- /dev/null
+++ b/src/openai/types/beta/threads/text_content_block_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TextContentBlockParam"]
+
+
+class TextContentBlockParam(TypedDict, total=False):
+    text: Required[str]
+    """Text content to be sent to the model"""
+
+    type: Required[Literal["text"]]
+    """Always `text`."""
diff --git a/src/openai/types/beta/threads/text_delta.py b/src/openai/types/beta/threads/text_delta.py
new file mode 100644
index 0000000000..09cd357027
--- /dev/null
+++ b/src/openai/types/beta/threads/text_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ...._models import BaseModel
+from .annotation_delta import AnnotationDelta
+
+__all__ = ["TextDelta"]
+
+
+class TextDelta(BaseModel):
+    annotations: Optional[List[AnnotationDelta]] = None
+
+    value: Optional[str] = None
+    """The data that makes up the text."""
diff --git a/src/openai/types/beta/threads/text_delta_block.py b/src/openai/types/beta/threads/text_delta_block.py
new file mode 100644
index 0000000000..586116e0d6
--- /dev/null
+++ b/src/openai/types/beta/threads/text_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .text_delta import TextDelta
+
+__all__ = ["TextDeltaBlock"]
+
+
+class TextDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["text"]
+    """Always `text`."""
+
+    text: Optional[TextDelta] = None
diff --git a/src/openai/types/beta/threads/thread_message.py b/src/openai/types/beta/threads/thread_message.py
deleted file mode 100644
index 0f782ef845..0000000000
--- a/src/openai/types/beta/threads/thread_message.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-import builtins
-from typing import List, Union, Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .message_content_text import MessageContentText
-from .message_content_image_file import MessageContentImageFile
-
-__all__ = ["ThreadMessage", "Content"]
-
-Content = Union[MessageContentImageFile, MessageContentText]
-
-
-class ThreadMessage(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: Optional[str]
-    """
-    If applicable, the ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
-    authored this message.
-    """
-
-    content: List[Content]
-    """The content of the message in array of text and/or images."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message was created."""
-
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs that
-    the assistant should use. Useful for tools like retrieval and code_interpreter
-    that can access files. A maximum of 10 files can be attached to a message.
-    """
-
-    metadata: Optional[builtins.object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    object: Literal["thread.message"]
-    """The object type, which is always `thread.message`."""
-
-    role: Literal["user", "assistant"]
-    """The entity that produced the message. One of `user` or `assistant`."""
-
-    run_id: Optional[str]
-    """
-    If applicable, the ID of the
-    [run](https://platform.openai.com/docs/api-reference/runs) associated with the
-    authoring of this message.
-    """
-
-    thread_id: str
-    """
-    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
-    this message belongs to.
-    """
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index 5fe182f41e..0945bcad11 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -1,33 +1,39 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_tool import ChatCompletionTool as ChatCompletionTool
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
+from .completion_list_params import CompletionListParams as CompletionListParams
+from .parsed_chat_completion import (
+    ParsedChoice as ParsedChoice,
+    ParsedChatCompletion as ParsedChatCompletion,
+    ParsedChatCompletionMessage as ParsedChatCompletionMessage,
+)
+from .chat_completion_deleted import ChatCompletionDeleted as ChatCompletionDeleted
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .chat_completion_tool_param import (
-    ChatCompletionToolParam as ChatCompletionToolParam,
-)
-from .chat_completion_message_param import (
-    ChatCompletionMessageParam as ChatCompletionMessageParam,
-)
-from .chat_completion_message_tool_call import (
-    ChatCompletionMessageToolCall as ChatCompletionMessageToolCall,
-)
-from .chat_completion_content_part_param import (
-    ChatCompletionContentPartParam as ChatCompletionContentPartParam,
-)
-from .chat_completion_tool_message_param import (
-    ChatCompletionToolMessageParam as ChatCompletionToolMessageParam,
-)
-from .chat_completion_user_message_param import (
-    ChatCompletionUserMessageParam as ChatCompletionUserMessageParam,
-)
-from .chat_completion_system_message_param import (
-    ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam,
+from .completion_update_params import CompletionUpdateParams as CompletionUpdateParams
+from .parsed_function_tool_call import (
+    ParsedFunction as ParsedFunction,
+    ParsedFunctionToolCall as ParsedFunctionToolCall,
 )
+from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
+from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
+from .chat_completion_store_message import ChatCompletionStoreMessage as ChatCompletionStoreMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
+from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
+from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
+from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam as ChatCompletionStreamOptionsParam
+from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import (
     ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,
 )
@@ -37,6 +43,9 @@
 from .chat_completion_content_part_text_param import (
     ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
 )
+from .chat_completion_developer_message_param import (
+    ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
+)
 from .chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
 )
@@ -46,9 +55,18 @@
 from .chat_completion_content_part_image_param import (
     ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
 )
+from .chat_completion_prediction_content_param import (
+    ChatCompletionPredictionContentParam as ChatCompletionPredictionContentParam,
+)
 from .chat_completion_tool_choice_option_param import (
     ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
 )
+from .chat_completion_content_part_refusal_param import (
+    ChatCompletionContentPartRefusalParam as ChatCompletionContentPartRefusalParam,
+)
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index da12ee7c07..afc23e3f3d 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
@@ -6,8 +6,17 @@
 from ..._models import BaseModel
 from ..completion_usage import CompletionUsage
 from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
-__all__ = ["ChatCompletion", "Choice"]
+__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
 
 
 class Choice(BaseModel):
@@ -24,6 +33,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
     message: ChatCompletionMessage
     """A chat completion message generated by the model."""
 
@@ -47,6 +59,26 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
 
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index abdd87c991..35e3a3d784 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -1,13 +1,23 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
+from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
+
+ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
 
 class FunctionCall(TypedDict, total=False):
@@ -24,18 +34,37 @@ class FunctionCall(TypedDict, total=False):
 
 
 class ChatCompletionAssistantMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
-    """The contents of the assistant message."""
-
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
-    function_call: FunctionCall
+    audio: Optional[Audio]
+    """Data about a previous audio response from the model.
+
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    content: Union[str, Iterable[ContentArrayOfContentPart], None]
+    """The contents of the assistant message.
+
+    Required unless `tool_calls` or `function_call` is specified.
+    """
+
+    function_call: Optional[FunctionCall]
     """Deprecated and replaced by `tool_calls`.
 
     The name and arguments of a function that should be called, as generated by the
     model.
     """
 
-    tool_calls: List[ChatCompletionMessageToolCallParam]
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+    refusal: Optional[str]
+    """The refusal message by the assistant."""
+
+    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
     """The tool calls generated by the model, such as function calls."""
diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py
new file mode 100644
index 0000000000..232d60563d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
new file mode 100644
index 0000000000..25caada177
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[
+        Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
+        ]
+    ]
+    """The voice the model uses to respond.
+
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+    `onyx`, `sage`, and `shimmer`.
+    """
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 6be046b01e..da6e315830 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -1,9 +1,11 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = [
     "ChatCompletionChunk",
@@ -12,6 +14,7 @@
     "ChoiceDeltaFunctionCall",
     "ChoiceDeltaToolCall",
     "ChoiceDeltaToolCallFunction",
+    "ChoiceLogprobs",
 ]
 
 
@@ -64,17 +67,28 @@ class ChoiceDelta(BaseModel):
     model.
     """
 
-    role: Optional[Literal["system", "user", "assistant", "tool"]] = None
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
+    role: Optional[Literal["developer", "system", "user", "assistant", "tool"]] = None
     """The role of the author of this message."""
 
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
 
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
+
 class Choice(BaseModel):
     delta: ChoiceDelta
     """A chat completion delta generated by streamed model responses."""
 
-    finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter", "function_call"]]
+    finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter", "function_call"]] = None
     """The reason the model stopped generating tokens.
 
     This will be `stop` if the model hit a natural stop point or a provided stop
@@ -87,6 +101,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
 
 class ChatCompletionChunk(BaseModel):
     id: str
@@ -95,7 +112,8 @@ class ChatCompletionChunk(BaseModel):
     choices: List[Choice]
     """A list of chat completion choices.
 
-    Can be more than one if `n` is greater than 1.
+    Can contain more than one elements if `n` is greater than 1. Can also be empty
+    for the last chunk if you set `stream_options: {"include_usage": true}`.
     """
 
     created: int
@@ -110,9 +128,40 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
     system_fingerprint: Optional[str] = None
     """
     This fingerprint represents the backend configuration that the model runs with.
     Can be used in conjunction with the `seed` request parameter to understand when
     backend changes have been made that might impact determinism.
     """
+
+    usage: Optional[CompletionUsage] = None
+    """
+    An optional field that will only be present when you set
+    `stream_options: {"include_usage": true}` in your request. When present, it
+    contains a null value **except for the last chunk** which contains the token
+    usage statistics for the entire request.
+
+    **NOTE:** If the stream is interrupted or cancelled, you may not receive the
+    final usage chunk which contains the total token usage for the request.
+    """
diff --git a/src/openai/types/chat/chat_completion_content_part_image_param.py b/src/openai/types/chat/chat_completion_content_part_image_param.py
index eb9bd52689..9d407324d0 100644
--- a/src/openai/types/chat/chat_completion_content_part_image_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_image_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -12,7 +12,11 @@ class ImageURL(TypedDict, total=False):
     """Either a URL of the image or the base64 encoded image data."""
 
     detail: Literal["auto", "low", "high"]
-    """Specifies the detail level of the image."""
+    """Specifies the detail level of the image.
+
+    Learn more in the
+    [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
+    """
 
 
 class ChatCompletionContentPartImageParam(TypedDict, total=False):
diff --git a/src/openai/types/chat/chat_completion_content_part_input_audio_param.py b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
new file mode 100644
index 0000000000..0b1b1a80b1
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py
index 587578e2ef..cbedc853ba 100644
--- a/src/openai/types/chat/chat_completion_content_part_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_param.py
@@ -1,14 +1,41 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
-from .chat_completion_content_part_image_param import (
-    ChatCompletionContentPartImageParam,
-)
+from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
+
+__all__ = ["ChatCompletionContentPartParam", "File", "FileFile"]
+
+
+class FileFile(TypedDict, total=False):
+    file_data: str
+    """
+    The base64 encoded file data, used when passing the file to the model as a
+    string.
+    """
+
+    file_id: str
+    """The ID of an uploaded file to use as input."""
 
-__all__ = ["ChatCompletionContentPartParam"]
+    filename: str
+    """The name of the file, used when passing the file to the model as a string."""
 
-ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
+
+class File(TypedDict, total=False):
+    file: Required[FileFile]
+
+    type: Required[Literal["file"]]
+    """The type of the content part. Always `file`."""
+
+
+ChatCompletionContentPartParam: TypeAlias = Union[
+    ChatCompletionContentPartTextParam,
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartInputAudioParam,
+    File,
+]
diff --git a/src/openai/types/chat/chat_completion_content_part_refusal_param.py b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
new file mode 100644
index 0000000000..c18c7db770
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartRefusalParam"]
+
+
+class ChatCompletionContentPartRefusalParam(TypedDict, total=False):
+    refusal: Required[str]
+    """The refusal message generated by the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the content part."""
diff --git a/src/openai/types/chat/chat_completion_content_part_text_param.py b/src/openai/types/chat/chat_completion_content_part_text_param.py
index 38edcf054e..a270744417 100644
--- a/src/openai/types/chat/chat_completion_content_part_text_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_text_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_deleted.py b/src/openai/types/chat/chat_completion_deleted.py
new file mode 100644
index 0000000000..0a541cb23d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_deleted.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionDeleted"]
+
+
+class ChatCompletionDeleted(BaseModel):
+    id: str
+    """The ID of the chat completion that was deleted."""
+
+    deleted: bool
+    """Whether the chat completion was deleted."""
+
+    object: Literal["chat.completion.deleted"]
+    """The type of object being deleted."""
diff --git a/src/openai/types/chat/chat_completion_developer_message_param.py b/src/openai/types/chat/chat_completion_developer_message_param.py
new file mode 100644
index 0000000000..01e4fdb654
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_developer_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/chat_completion_function_call_option_param.py b/src/openai/types/chat/chat_completion_function_call_option_param.py
index 72d41d908c..2bc014af7a 100644
--- a/src/openai/types/chat/chat_completion_function_call_option_param.py
+++ b/src/openai/types/chat/chat_completion_function_call_option_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_function_message_param.py b/src/openai/types/chat/chat_completion_function_message_param.py
index 1a16c5f5eb..5af12bf94f 100644
--- a/src/openai/types/chat/chat_completion_function_message_param.py
+++ b/src/openai/types/chat/chat_completion_function_message_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -10,7 +10,7 @@
 
 class ChatCompletionFunctionMessageParam(TypedDict, total=False):
     content: Required[Optional[str]]
-    """The return value from the function call, to return to the model."""
+    """The contents of the function message."""
 
     name: Required[str]
     """The name of the function to call."""
diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py
index 4749798a33..c659ac3da0 100644
--- a/src/openai/types/chat/chat_completion_message.py
+++ b/src/openai/types/chat/chat_completion_message.py
@@ -1,12 +1,35 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_audio import ChatCompletionAudio
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
 
-__all__ = ["ChatCompletionMessage", "FunctionCall"]
+__all__ = ["ChatCompletionMessage", "Annotation", "AnnotationURLCitation", "FunctionCall"]
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class Annotation(BaseModel):
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url_citation: AnnotationURLCitation
+    """A URL citation when using web search."""
 
 
 class FunctionCall(BaseModel):
@@ -23,12 +46,28 @@ class FunctionCall(BaseModel):
 
 
 class ChatCompletionMessage(BaseModel):
-    content: Optional[str]
+    content: Optional[str] = None
     """The contents of the message."""
 
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    annotations: Optional[List[Annotation]] = None
+    """
+    Annotations for the message, when applicable, as when using the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+    """
+
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 
diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py
index 7ec3d6a7b7..942da24304 100644
--- a/src/openai/types/chat/chat_completion_message_param.py
+++ b/src/openai/types/chat/chat_completion_message_param.py
@@ -1,18 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
 from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
 
 __all__ = ["ChatCompletionMessageParam"]
 
-ChatCompletionMessageParam = Union[
+ChatCompletionMessageParam: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionSystemMessageParam,
     ChatCompletionUserMessageParam,
     ChatCompletionAssistantMessageParam,
diff --git a/src/openai/types/chat/chat_completion_message_tool_call.py b/src/openai/types/chat/chat_completion_message_tool_call.py
index 63c72fcdca..4fec667096 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call.py
+++ b/src/openai/types/chat/chat_completion_message_tool_call.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/chat/chat_completion_message_tool_call_param.py b/src/openai/types/chat/chat_completion_message_tool_call_param.py
index a700f02c4f..f616c363d0 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call_param.py
+++ b/src/openai/types/chat/chat_completion_message_tool_call_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/chat/chat_completion_modality.py b/src/openai/types/chat/chat_completion_modality.py
new file mode 100644
index 0000000000..8e3c145979
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_modality.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
diff --git a/src/openai/types/chat/chat_completion_named_tool_choice_param.py b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
index 4c6f20d2f1..369f8b42dd 100644
--- a/src/openai/types/chat/chat_completion_named_tool_choice_param.py
+++ b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -13,7 +13,7 @@ class Function(TypedDict, total=False):
 
 
 class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
-    function: Function
+    function: Required[Function]
 
-    type: Literal["function"]
+    type: Required[Literal["function"]]
     """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_prediction_content_param.py b/src/openai/types/chat/chat_completion_prediction_content_param.py
new file mode 100644
index 0000000000..c44e6e3653
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_prediction_content_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionPredictionContentParam"]
+
+
+class ChatCompletionPredictionContentParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """
+    The content that should be matched when generating a model response. If
+    generated tokens would match this content, the entire model response can be
+    returned much more quickly.
+    """
+
+    type: Required[Literal["content"]]
+    """The type of the predicted content you want to provide.
+
+    This type is currently always `content`.
+    """
diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py
new file mode 100644
index 0000000000..42a980c5b8
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort = ReasoningEffort
diff --git a/src/openai/types/chat/chat_completion_role.py b/src/openai/types/chat/chat_completion_role.py
index 9fa2acb4bb..3ec5e9ad87 100644
--- a/src/openai/types/chat/chat_completion_role.py
+++ b/src/openai/types/chat/chat_completion_role.py
@@ -1,7 +1,7 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 __all__ = ["ChatCompletionRole"]
 
-ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
+ChatCompletionRole: TypeAlias = Literal["developer", "system", "user", "assistant", "tool", "function"]
diff --git a/src/openai/types/chat/chat_completion_store_message.py b/src/openai/types/chat/chat_completion_store_message.py
new file mode 100644
index 0000000000..8dc093f7b8
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_store_message.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat_completion_message import ChatCompletionMessage
+
+__all__ = ["ChatCompletionStoreMessage"]
+
+
+class ChatCompletionStoreMessage(ChatCompletionMessage):
+    id: str
+    """The identifier of the chat message."""
diff --git a/src/openai/types/chat/chat_completion_stream_options_param.py b/src/openai/types/chat/chat_completion_stream_options_param.py
new file mode 100644
index 0000000000..471e0eba98
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_stream_options_param.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatCompletionStreamOptionsParam"]
+
+
+class ChatCompletionStreamOptionsParam(TypedDict, total=False):
+    include_usage: bool
+    """If set, an additional chunk will be streamed before the `data: [DONE]` message.
+
+    The `usage` field on this chunk shows the token usage statistics for the entire
+    request, and the `choices` field will always be an empty array.
+
+    All other chunks will also include a `usage` field, but with a null value.
+    **NOTE:** If the stream is interrupted, you may not receive the final usage
+    chunk which contains the total token usage for the request.
+    """
diff --git a/src/openai/types/chat/chat_completion_system_message_param.py b/src/openai/types/chat/chat_completion_system_message_param.py
index ec08e00350..172ccea09e 100644
--- a/src/openai/types/chat/chat_completion_system_message_param.py
+++ b/src/openai/types/chat/chat_completion_system_message_param.py
@@ -1,16 +1,25 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionSystemMessageParam"]
 
 
 class ChatCompletionSystemMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
     """The role of the messages author, in this case `system`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/chat_completion_token_logprob.py b/src/openai/types/chat/chat_completion_token_logprob.py
new file mode 100644
index 0000000000..c69e258910
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_token_logprob.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
+
+
+class TopLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
diff --git a/src/openai/types/chat/chat_completion_tool.py b/src/openai/types/chat/chat_completion_tool.py
new file mode 100644
index 0000000000..ae9126f906
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["ChatCompletionTool"]
+
+
+class ChatCompletionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_tool_choice_option_param.py b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
index 8104b26acb..7dedf041b7 100644
--- a/src/openai/types/chat/chat_completion_tool_choice_option_param.py
+++ b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
@@ -1,12 +1,14 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
 
 __all__ = ["ChatCompletionToolChoiceOptionParam"]
 
-ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto"], ChatCompletionNamedToolChoiceParam]
+ChatCompletionToolChoiceOptionParam: TypeAlias = Union[
+    Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam
+]
diff --git a/src/openai/types/chat/chat_completion_tool_message_param.py b/src/openai/types/chat/chat_completion_tool_message_param.py
index 51759a9a99..eb5e270e47 100644
--- a/src/openai/types/chat/chat_completion_tool_message_param.py
+++ b/src/openai/types/chat/chat_completion_tool_message_param.py
@@ -1,15 +1,17 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionToolMessageParam"]
 
 
 class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
diff --git a/src/openai/types/chat/chat_completion_tool_param.py b/src/openai/types/chat/chat_completion_tool_param.py
index 54c223955e..6c2b1a36f0 100644
--- a/src/openai/types/chat/chat_completion_tool_param.py
+++ b/src/openai/types/chat/chat_completion_tool_param.py
@@ -1,16 +1,16 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from ..shared_params.function_definition import FunctionDefinition
 
 __all__ = ["ChatCompletionToolParam"]
 
 
 class ChatCompletionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+    function: Required[FunctionDefinition]
 
     type: Required[Literal["function"]]
     """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_user_message_param.py b/src/openai/types/chat/chat_completion_user_message_param.py
index 6f0cf34623..5c15322a22 100644
--- a/src/openai/types/chat/chat_completion_user_message_param.py
+++ b/src/openai/types/chat/chat_completion_user_message_param.py
@@ -1,8 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
 from .chat_completion_content_part_param import ChatCompletionContentPartParam
@@ -11,8 +11,15 @@
 
 
 class ChatCompletionUserMessageParam(TypedDict, total=False):
-    content: Required[Union[str, List[ChatCompletionContentPartParam], None]]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
     """The contents of the user message."""
 
     role: Required[Literal["user"]]
     """The role of the messages author, in this case `user`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 69fe250eca..44ea853041 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -1,63 +1,63 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ...types import shared_params
+from ..shared.chat_model import ChatModel
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
-from .chat_completion_tool_choice_option_param import (
-    ChatCompletionToolChoiceOptionParam,
-)
-from .chat_completion_function_call_option_param import (
-    ChatCompletionFunctionCallOptionParam,
-)
+from ..shared_params.function_parameters import FunctionParameters
+from ..shared_params.response_format_text import ResponseFormatText
+from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from .chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
+from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
 
 __all__ = [
     "CompletionCreateParamsBase",
     "FunctionCall",
     "Function",
     "ResponseFormat",
+    "WebSearchOptions",
+    "WebSearchOptionsUserLocation",
+    "WebSearchOptionsUserLocationApproximate",
     "CompletionCreateParamsNonStreaming",
     "CompletionCreateParamsStreaming",
 ]
 
 
 class CompletionCreateParamsBase(TypedDict, total=False):
-    messages: Required[List[ChatCompletionMessageParam]]
+    messages: Required[Iterable[ChatCompletionMessageParam]]
     """A list of messages comprising the conversation so far.
 
-    [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-    """
-
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ]
-    ]
-    """ID of the model to use.
+    Depending on the [model](https://platform.openai.com/docs/models) you use,
+    different message types (modalities) are supported, like
+    [text](https://platform.openai.com/docs/guides/text-generation),
+    [images](https://platform.openai.com/docs/guides/vision), and
+    [audio](https://platform.openai.com/docs/guides/audio).
+    """
 
-    See the
-    [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-    table for details on which models work with the Chat API.
+    model: Required[Union[str, ChatModel]]
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
     """
 
     frequency_penalty: Optional[float]
@@ -65,24 +65,26 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
     """
 
     function_call: FunctionCall
     """Deprecated in favor of `tool_choice`.
 
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
+    Controls which (if any) function is called by the model.
+
+    `none` means the model will not call a function and instead generates a message.
+
+    `auto` means the model can pick between generating a message or calling a
     function.
 
-    `none` is the default when no functions are present. `auto`` is the default if
+    Specifying a particular function via `{"name": "my_function"}` forces the model
+    to call that function.
+
+    `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
 
-    functions: List[Function]
+    functions: Iterable[Function]
     """Deprecated in favor of `tools`.
 
     A list of functions the model may generate JSON inputs for.
@@ -99,81 +101,186 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     or exclusive selection of the relevant token.
     """
 
+    logprobs: Optional[bool]
+    """Whether to return log probabilities of the output tokens or not.
+
+    If true, returns the log probabilities of each output token returned in the
+    `content` of `message`.
+    """
+
+    max_completion_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a completion,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the chat
+    completion. This value can be used to control
+    [costs](https://openai.com/api/pricing/) for text generated via API.
+
+    This value is now deprecated in favor of `max_completion_tokens`, and is not
+    compatible with
+    [o-series models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]]
+    """
+    Output types that you would like the model to generate. Most models are capable
+    of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
+
+    `["text", "audio"]`
     """
 
     n: Optional[int]
-    """How many chat completion choices to generate for each input message."""
+    """How many chat completion choices to generate for each input message.
+
+    Note that you will be charged based on the number of generated tokens across all
+    of the choices. Keep `n` as `1` to minimize costs.
+    """
+
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    prediction: Optional[ChatCompletionPredictionContentParam]
+    """
+    Static predicted output content, such as the content of a text file that is
+    being regenerated.
+    """
 
     presence_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
     """
 
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-    message the model generates is valid JSON.
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    **Important:** when using JSON mode, you **must** also instruct the model to
-    produce JSON yourself via a system or user message. Without this, the model may
-    generate an unending stream of whitespace until the generation reaches the token
-    limit, resulting in increased latency and appearance of a "stuck" request. Also
-    note that the message content may be partially cut off if
-    `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-    or the conversation exceeded the max context length.
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
     """
 
     seed: Optional[int]
-    """This feature is in Beta.
+    """
+    This feature is in Beta. If specified, our system will make a best effort to
+    sample deterministically, such that repeated requests with the same `seed` and
+    parameters should return the same result. Determinism is not guaranteed, and you
+    should refer to the `system_fingerprint` response parameter to monitor changes
+    in the backend.
+    """
+
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    stop: Union[Optional[str], List[str], None]
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    If specified, our system will make a best effort to sample deterministically,
-    such that repeated requests with the same `seed` and parameters should return
-    the same result. Determinism is not guaranteed, and you should refer to the
-    `system_fingerprint` response parameter to monitor changes in the backend.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
-    stop: Union[Optional[str], List[str]]
-    """Up to 4 sequences where the API will stop generating further tokens."""
+    store: Optional[bool]
+    """
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+    Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+    """
+
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
 
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
     Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
     """
 
     tool_choice: ChatCompletionToolChoiceOptionParam
     """
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via
-    `{"type: "function", "function": {"name": "my_function"}}` forces the model to
-    call that function.
-
-    `none` is the default when no functions are present. `auto` is the default if
-    functions are present.
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tool and instead generates a message. `auto` means the model can
+    pick between generating a message or calling one or more tools. `required` means
+    the model must call one or more tools. Specifying a particular tool via
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+
+    `none` is the default when no tools are present. `auto` is the default if tools
+    are present.
     """
 
-    tools: List[ChatCompletionToolParam]
+    tools: Iterable[ChatCompletionToolParam]
     """A list of tools the model may call.
 
     Currently, only functions are supported as a tool. Use this to provide a list of
-    functions the model may generate JSON inputs for.
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    `logprobs` must be set to `true` if this parameter is used.
     """
 
     top_p: Optional[float]
@@ -186,14 +293,22 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     user: str
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+    web_search_options: WebSearchOptions
     """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    This tool searches the web for relevant results to use in a response. Learn more
+    about the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
     """
 
 
-FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
+FunctionCall: TypeAlias = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
 
 
 class Function(TypedDict, total=False):
@@ -204,51 +319,91 @@ class Function(TypedDict, total=False):
     of 64.
     """
 
-    parameters: Required[shared_params.FunctionParameters]
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
+    Omitting `parameters` defines a function with an empty parameter list.
     """
 
-    description: str
+
+ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
+
+class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
+    city: str
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: str
     """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
     """
 
+    region: str
+    """Free text input for the region of the user, e.g. `California`."""
 
-class ResponseFormat(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
+    timezone: str
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
+class WebSearchOptionsUserLocation(TypedDict, total=False):
+    approximate: Required[WebSearchOptionsUserLocationApproximate]
+    """Approximate location parameters for the search."""
+
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
 
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+class WebSearchOptions(TypedDict, total=False):
+    search_context_size: Literal["low", "medium", "high"]
+    """
+    High level guidance for the amount of context window space to use for the
+    search. One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[WebSearchOptionsUserLocation]
+    """Approximate location parameters for the search."""
+
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
-
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
diff --git a/src/openai/types/chat/completion_list_params.py b/src/openai/types/chat/completion_list_params.py
new file mode 100644
index 0000000000..d93da834a3
--- /dev/null
+++ b/src/openai/types/chat/completion_list_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionListParams"]
+
+
+class CompletionListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last chat completion from the previous pagination request."""
+
+    limit: int
+    """Number of Chat Completions to retrieve."""
+
+    metadata: Optional[Metadata]
+    """A list of metadata keys to filter the Chat Completions by. Example:
+
+    `metadata[key1]=value1&metadata[key2]=value2`
+    """
+
+    model: str
+    """The model used to generate the Chat Completions."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for Chat Completions by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat/completion_update_params.py b/src/openai/types/chat/completion_update_params.py
new file mode 100644
index 0000000000..fc71733f07
--- /dev/null
+++ b/src/openai/types/chat/completion_update_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionUpdateParams"]
+
+
+class CompletionUpdateParams(TypedDict, total=False):
+    metadata: Required[Optional[Metadata]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/chat/completions/__init__.py b/src/openai/types/chat/completions/__init__.py
new file mode 100644
index 0000000000..b8e62d6a64
--- /dev/null
+++ b/src/openai/types/chat/completions/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_list_params import MessageListParams as MessageListParams
diff --git a/src/openai/types/chat/completions/message_list_params.py b/src/openai/types/chat/completions/message_list_params.py
new file mode 100644
index 0000000000..4e694e83ea
--- /dev/null
+++ b/src/openai/types/chat/completions/message_list_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["MessageListParams"]
+
+
+class MessageListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last message from the previous pagination request."""
+
+    limit: int
+    """Number of messages to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for messages by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat/parsed_chat_completion.py b/src/openai/types/chat/parsed_chat_completion.py
new file mode 100644
index 0000000000..4b11dac5a0
--- /dev/null
+++ b/src/openai/types/chat/parsed_chat_completion.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Generic, TypeVar, Optional
+
+from ..._models import GenericModel
+from .chat_completion import Choice, ChatCompletion
+from .chat_completion_message import ChatCompletionMessage
+from .parsed_function_tool_call import ParsedFunctionToolCall
+
+__all__ = ["ParsedChatCompletion", "ParsedChoice"]
+
+
+ContentType = TypeVar("ContentType")
+
+
+# we need to disable this check because we're overriding properties
+# with subclasses of their types which is technically unsound as
+# properties can be mutated.
+# pyright: reportIncompatibleVariableOverride=false
+
+
+class ParsedChatCompletionMessage(ChatCompletionMessage, GenericModel, Generic[ContentType]):
+    parsed: Optional[ContentType] = None
+    """The auto-parsed message contents"""
+
+    tool_calls: Optional[List[ParsedFunctionToolCall]] = None  # type: ignore[assignment]
+    """The tool calls generated by the model, such as function calls."""
+
+
+class ParsedChoice(Choice, GenericModel, Generic[ContentType]):
+    message: ParsedChatCompletionMessage[ContentType]
+    """A chat completion message generated by the model."""
+
+
+class ParsedChatCompletion(ChatCompletion, GenericModel, Generic[ContentType]):
+    choices: List[ParsedChoice[ContentType]]  # type: ignore[assignment]
+    """A list of chat completion choices.
+
+    Can be more than one if `n` is greater than 1.
+    """
diff --git a/src/openai/types/chat/parsed_function_tool_call.py b/src/openai/types/chat/parsed_function_tool_call.py
new file mode 100644
index 0000000000..3e90789f85
--- /dev/null
+++ b/src/openai/types/chat/parsed_function_tool_call.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .chat_completion_message_tool_call import Function, ChatCompletionMessageToolCall
+
+__all__ = ["ParsedFunctionToolCall", "ParsedFunction"]
+
+# we need to disable this check because we're overriding properties
+# with subclasses of their types which is technically unsound as
+# properties can be mutated.
+# pyright: reportIncompatibleVariableOverride=false
+
+
+class ParsedFunction(Function):
+    parsed_arguments: Optional[object] = None
+    """
+    The arguments to call the function with.
+
+    If you used `openai.pydantic_function_tool()` then this will be an
+    instance of the given `BaseModel`.
+
+    Otherwise, this will be the parsed JSON arguments.
+    """
+
+
+class ParsedFunctionToolCall(ChatCompletionMessageToolCall):
+    function: ParsedFunction
+    """The function that the model called."""
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
new file mode 100644
index 0000000000..f3b0e310cc
--- /dev/null
+++ b/src/openai/types/chat_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .shared import chat_model
+
+__all__ = ["ChatModel"]
+
+ChatModel = chat_model.ChatModel
diff --git a/src/openai/types/completion.py b/src/openai/types/completion.py
index cd80498b16..d3b3102a4a 100644
--- a/src/openai/types/completion.py
+++ b/src/openai/types/completion.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
 from typing_extensions import Literal
diff --git a/src/openai/types/completion_choice.py b/src/openai/types/completion_choice.py
index 71de0f9247..d948ebc942 100644
--- a/src/openai/types/completion_choice.py
+++ b/src/openai/types/completion_choice.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict, List, Optional
 from typing_extensions import Literal
@@ -30,6 +30,6 @@ class CompletionChoice(BaseModel):
 
     index: int
 
-    logprobs: Optional[Logprobs]
+    logprobs: Optional[Logprobs] = None
 
     text: str
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index 3e56d4f7bf..6ae20cff83 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -1,41 +1,27 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Optional
+from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+
 __all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
 
 
 class CompletionCreateParamsBase(TypedDict, total=False):
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "babbage-002",
-                "davinci-002",
-                "gpt-3.5-turbo-instruct",
-                "text-davinci-003",
-                "text-davinci-002",
-                "text-davinci-001",
-                "code-davinci-002",
-                "text-curie-001",
-                "text-babbage-001",
-                "text-ada-001",
-            ],
-        ]
-    ]
+    model: Required[Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
-    prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
+    prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]]
     """
     The prompt(s) to generate completions for, encoded as a string, array of
     strings, array of tokens, or array of token arrays.
@@ -67,7 +53,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     logit_bias: Optional[Dict[str, int]]
@@ -75,12 +61,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Accepts a JSON object that maps tokens (specified by their token ID in the GPT
     tokenizer) to an associated bias value from -100 to 100. You can use this
-    [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to
-    convert text to token IDs. Mathematically, the bias is added to the logits
-    generated by the model prior to sampling. The exact effect will vary per model,
-    but values between -1 and 1 should decrease or increase likelihood of selection;
-    values like -100 or 100 should result in a ban or exclusive selection of the
-    relevant token.
+    [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
+    Mathematically, the bias is added to the logits generated by the model prior to
+    sampling. The exact effect will vary per model, but values between -1 and 1
+    should decrease or increase likelihood of selection; values like -100 or 100
+    should result in a ban or exclusive selection of the relevant token.
 
     As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
     from being generated.
@@ -88,16 +73,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     logprobs: Optional[int]
     """
-    Include the log probabilities on the `logprobs` most likely tokens, as well the
-    chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-    the 5 most likely tokens. The API will always return the `logprob` of the
-    sampled token, so there may be up to `logprobs+1` elements in the response.
+    Include the log probabilities on the `logprobs` most likely output tokens, as
+    well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+    list of the 5 most likely tokens. The API will always return the `logprob` of
+    the sampled token, so there may be up to `logprobs+1` elements in the response.
 
     The maximum value for `logprobs` is 5.
     """
 
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the
+    completion.
 
     The token count of your prompt plus `max_tokens` cannot exceed the model's
     context length.
@@ -119,7 +106,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     seed: Optional[int]
@@ -133,13 +120,20 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
+    stream_options: Optional[ChatCompletionStreamOptionsParam]
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
     suffix: Optional[str]
-    """The suffix that comes after a completion of inserted text."""
+    """The suffix that comes after a completion of inserted text.
+
+    This parameter is only supported for `gpt-3.5-turbo-instruct`.
+    """
 
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
@@ -163,11 +157,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """Whether to stream back partial progress.
 
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index b825d5529f..d8c4e84cf7 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,8 +1,40 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
 
 from .._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CompletionTokensDetails", "PromptTokensDetails"]
+
+
+class CompletionTokensDetails(BaseModel):
+    accepted_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that
+    appeared in the completion.
+    """
+
+    audio_tokens: Optional[int] = None
+    """Audio input tokens generated by the model."""
+
+    reasoning_tokens: Optional[int] = None
+    """Tokens generated by the model for reasoning."""
+
+    rejected_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that did
+    not appear in the completion. However, like reasoning tokens, these tokens are
+    still counted in the total completion tokens for purposes of billing, output,
+    and context window limits.
+    """
+
+
+class PromptTokensDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """Audio input tokens present in the prompt."""
+
+    cached_tokens: Optional[int] = None
+    """Cached tokens present in the prompt."""
 
 
 class CompletionUsage(BaseModel):
@@ -14,3 +46,9 @@ class CompletionUsage(BaseModel):
 
     total_tokens: int
     """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    """Breakdown of tokens used in a completion."""
+
+    prompt_tokens_details: Optional[PromptTokensDetails] = None
+    """Breakdown of tokens used in the prompt."""
diff --git a/src/openai/types/container_create_params.py b/src/openai/types/container_create_params.py
new file mode 100644
index 0000000000..bd27334933
--- /dev/null
+++ b/src/openai/types/container_create_params.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ContainerCreateParams", "ExpiresAfter"]
+
+
+class ContainerCreateParams(TypedDict, total=False):
+    name: Required[str]
+    """Name of the container to create."""
+
+    expires_after: ExpiresAfter
+    """Container expiration time in seconds relative to the 'anchor' time."""
+
+    file_ids: List[str]
+    """IDs of files to copy to the container."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Time anchor for the expiration time.
+
+    Currently only 'last_active_at' is supported.
+    """
+
+    minutes: Required[int]
diff --git a/src/openai/types/container_create_response.py b/src/openai/types/container_create_response.py
new file mode 100644
index 0000000000..c0ccc45a1c
--- /dev/null
+++ b/src/openai/types/container_create_response.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerCreateResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
diff --git a/src/openai/types/container_list_params.py b/src/openai/types/container_list_params.py
new file mode 100644
index 0000000000..4821a87d18
--- /dev/null
+++ b/src/openai/types/container_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ContainerListParams"]
+
+
+class ContainerListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/container_list_response.py b/src/openai/types/container_list_response.py
new file mode 100644
index 0000000000..2d9c11d8a4
--- /dev/null
+++ b/src/openai/types/container_list_response.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerListResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerListResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
diff --git a/src/openai/types/container_retrieve_response.py b/src/openai/types/container_retrieve_response.py
new file mode 100644
index 0000000000..eab291b34f
--- /dev/null
+++ b/src/openai/types/container_retrieve_response.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerRetrieveResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
diff --git a/src/openai/types/containers/__init__.py b/src/openai/types/containers/__init__.py
new file mode 100644
index 0000000000..7d555ad3a4
--- /dev/null
+++ b/src/openai/types/containers/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .file_list_params import FileListParams as FileListParams
+from .file_create_params import FileCreateParams as FileCreateParams
+from .file_list_response import FileListResponse as FileListResponse
+from .file_create_response import FileCreateResponse as FileCreateResponse
+from .file_retrieve_response import FileRetrieveResponse as FileRetrieveResponse
diff --git a/src/openai/types/containers/file_create_params.py b/src/openai/types/containers/file_create_params.py
new file mode 100644
index 0000000000..1e41330017
--- /dev/null
+++ b/src/openai/types/containers/file_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file: FileTypes
+    """The File object (not file name) to be uploaded."""
+
+    file_id: str
+    """Name of the file to create."""
diff --git a/src/openai/types/containers/file_create_response.py b/src/openai/types/containers/file_create_response.py
new file mode 100644
index 0000000000..4a652483fc
--- /dev/null
+++ b/src/openai/types/containers/file_create_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileCreateResponse"]
+
+
+class FileCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/containers/file_list_params.py b/src/openai/types/containers/file_list_params.py
new file mode 100644
index 0000000000..3565acaf36
--- /dev/null
+++ b/src/openai/types/containers/file_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["FileListParams"]
+
+
+class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/containers/file_list_response.py b/src/openai/types/containers/file_list_response.py
new file mode 100644
index 0000000000..e5eee38d99
--- /dev/null
+++ b/src/openai/types/containers/file_list_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileListResponse"]
+
+
+class FileListResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/containers/file_retrieve_response.py b/src/openai/types/containers/file_retrieve_response.py
new file mode 100644
index 0000000000..37fb0e43dd
--- /dev/null
+++ b/src/openai/types/containers/file_retrieve_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileRetrieveResponse"]
+
+
+class FileRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/containers/files/__init__.py b/src/openai/types/containers/files/__init__.py
new file mode 100644
index 0000000000..f8ee8b14b1
--- /dev/null
+++ b/src/openai/types/containers/files/__init__.py
@@ -0,0 +1,3 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
diff --git a/src/openai/types/create_embedding_response.py b/src/openai/types/create_embedding_response.py
index bf64037e16..eff247a112 100644
--- a/src/openai/types/create_embedding_response.py
+++ b/src/openai/types/create_embedding_response.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 from typing_extensions import Literal
diff --git a/src/openai/types/edit.py b/src/openai/types/edit.py
deleted file mode 100644
index 48bca2987b..0000000000
--- a/src/openai/types/edit.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .completion_usage import CompletionUsage
-
-__all__ = ["Edit", "Choice"]
-
-
-class Choice(BaseModel):
-    finish_reason: Literal["stop", "length"]
-    """The reason the model stopped generating tokens.
-
-    This will be `stop` if the model hit a natural stop point or a provided stop
-    sequence, `length` if the maximum number of tokens specified in the request was
-    reached, or `content_filter` if content was omitted due to a flag from our
-    content filters.
-    """
-
-    index: int
-    """The index of the choice in the list of choices."""
-
-    text: str
-    """The edited result."""
-
-
-class Edit(BaseModel):
-    choices: List[Choice]
-    """A list of edit choices. Can be more than one if `n` is greater than 1."""
-
-    created: int
-    """The Unix timestamp (in seconds) of when the edit was created."""
-
-    object: Literal["edit"]
-    """The object type, which is always `edit`."""
-
-    usage: CompletionUsage
-    """Usage statistics for the completion request."""
diff --git a/src/openai/types/edit_create_params.py b/src/openai/types/edit_create_params.py
deleted file mode 100644
index a23b79c369..0000000000
--- a/src/openai/types/edit_create_params.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["EditCreateParams"]
-
-
-class EditCreateParams(TypedDict, total=False):
-    instruction: Required[str]
-    """The instruction that tells the model how to edit the prompt."""
-
-    model: Required[Union[str, Literal["text-davinci-edit-001", "code-davinci-edit-001"]]]
-    """ID of the model to use.
-
-    You can use the `text-davinci-edit-001` or `code-davinci-edit-001` model with
-    this endpoint.
-    """
-
-    input: Optional[str]
-    """The input text to use as a starting point for the edit."""
-
-    n: Optional[int]
-    """How many edits to generate for the input and instruction."""
-
-    temperature: Optional[float]
-    """What sampling temperature to use, between 0 and 2.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
-    """
-
-    top_p: Optional[float]
-    """
-    An alternative to sampling with temperature, called nucleus sampling, where the
-    model considers the results of the tokens with top_p probability mass. So 0.1
-    means only the tokens comprising the top 10% probability mass are considered.
-
-    We generally recommend altering this or `temperature` but not both.
-    """
diff --git a/src/openai/types/embedding.py b/src/openai/types/embedding.py
index 9c53704d5d..769b1d165f 100644
--- a/src/openai/types/embedding.py
+++ b/src/openai/types/embedding.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 from typing_extensions import Literal
diff --git a/src/openai/types/embedding_create_params.py b/src/openai/types/embedding_create_params.py
index bc8535f880..94edce10a4 100644
--- a/src/openai/types/embedding_create_params.py
+++ b/src/openai/types/embedding_create_params.py
@@ -1,32 +1,43 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union
+from typing import List, Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .embedding_model import EmbeddingModel
+
 __all__ = ["EmbeddingCreateParams"]
 
 
 class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str], List[int], List[List[int]]]]
+    input: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
     """Input text to embed, encoded as a string or array of tokens.
 
     To embed multiple inputs in a single request, pass an array of strings or array
     of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`) and cannot be an empty string.
+    (8192 tokens for all embedding models), cannot be an empty string, and any array
+    must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    for counting tokens. In addition to the per-input token limit, all embedding
+    models enforce a maximum of 300,000 tokens summed across all inputs in a single
+    request.
     """
 
-    model: Required[Union[str, Literal["text-embedding-ada-002"]]]
+    model: Required[Union[str, EmbeddingModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
+    """
+
+    dimensions: int
+    """The number of dimensions the resulting output embeddings should have.
+
+    Only supported in `text-embedding-3` and later models.
     """
 
     encoding_format: Literal["float", "base64"]
@@ -39,5 +50,5 @@ class EmbeddingCreateParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/embedding_model.py b/src/openai/types/embedding_model.py
new file mode 100644
index 0000000000..075ff97644
--- /dev/null
+++ b/src/openai/types/embedding_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["EmbeddingModel"]
+
+EmbeddingModel: TypeAlias = Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
diff --git a/src/openai/types/eval_create_params.py b/src/openai/types/eval_create_params.py
new file mode 100644
index 0000000000..20a3765481
--- /dev/null
+++ b/src/openai/types/eval_create_params.py
@@ -0,0 +1,180 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .shared_params.metadata import Metadata
+from .graders.python_grader_param import PythonGraderParam
+from .graders.score_model_grader_param import ScoreModelGraderParam
+from .graders.string_check_grader_param import StringCheckGraderParam
+from .responses.response_input_text_param import ResponseInputTextParam
+from .graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = [
+    "EvalCreateParams",
+    "DataSourceConfig",
+    "DataSourceConfigCustom",
+    "DataSourceConfigLogs",
+    "DataSourceConfigStoredCompletions",
+    "TestingCriterion",
+    "TestingCriterionLabelModel",
+    "TestingCriterionLabelModelInput",
+    "TestingCriterionLabelModelInputSimpleInputMessage",
+    "TestingCriterionLabelModelInputEvalItem",
+    "TestingCriterionLabelModelInputEvalItemContent",
+    "TestingCriterionLabelModelInputEvalItemContentOutputText",
+    "TestingCriterionTextSimilarity",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+]
+
+
+class EvalCreateParams(TypedDict, total=False):
+    data_source_config: Required[DataSourceConfig]
+    """The configuration for the data source used for the evaluation runs.
+
+    Dictates the schema of the data used in the evaluation.
+    """
+
+    testing_criteria: Required[Iterable[TestingCriterion]]
+    """A list of graders for all eval runs in this group.
+
+    Graders can reference variables in the data source using double curly braces
+    notation, like `{{item.variable_name}}`. To reference the model's output, use
+    the `sample` namespace (ie, `{{sample.output_text}}`).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+
+class DataSourceConfigCustom(TypedDict, total=False):
+    item_schema: Required[Dict[str, object]]
+    """The json schema for each row in the data source."""
+
+    type: Required[Literal["custom"]]
+    """The type of data source. Always `custom`."""
+
+    include_sample_schema: bool
+    """
+    Whether the eval should expect you to populate the sample namespace (ie, by
+    generating responses off of your data source)
+    """
+
+
+class DataSourceConfigLogs(TypedDict, total=False):
+    type: Required[Literal["logs"]]
+    """The type of data source. Always `logs`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the logs data source."""
+
+
+class DataSourceConfigStoredCompletions(TypedDict, total=False):
+    type: Required[Literal["stored_completions"]]
+    """The type of data source. Always `stored_completions`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the stored completions data source."""
+
+
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
+
+
+class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
+    str, ResponseInputTextParam, TestingCriterionLabelModelInputEvalItemContentOutputText
+]
+
+
+class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
+    content: Required[TestingCriterionLabelModelInputEvalItemContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+TestingCriterionLabelModelInput: TypeAlias = Union[
+    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
+]
+
+
+class TestingCriterionLabelModel(TypedDict, total=False):
+    input: Required[Iterable[TestingCriterionLabelModelInput]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    labels: Required[List[str]]
+    """The labels to classify to each item in the evaluation."""
+
+    model: Required[str]
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    passing_labels: Required[List[str]]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Required[Literal["label_model"]]
+    """The object type, which is always `label_model`."""
+
+
+class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
+    pass_threshold: Required[float]
+    """The threshold for the score."""
+
+
+class TestingCriterionPython(PythonGraderParam, total=False):
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    TestingCriterionLabelModel,
+    StringCheckGraderParam,
+    TestingCriterionTextSimilarity,
+    TestingCriterionPython,
+    TestingCriterionScoreModel,
+]
diff --git a/src/openai/types/eval_create_response.py b/src/openai/types/eval_create_response.py
new file mode 100644
index 0000000000..20b0e3127f
--- /dev/null
+++ b/src/openai/types/eval_create_response.py
@@ -0,0 +1,111 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalCreateResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_custom_data_source_config.py b/src/openai/types/eval_custom_data_source_config.py
new file mode 100644
index 0000000000..d99701cc71
--- /dev/null
+++ b/src/openai/types/eval_custom_data_source_config.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["EvalCustomDataSourceConfig"]
+
+
+class EvalCustomDataSourceConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["custom"]
+    """The type of data source. Always `custom`."""
diff --git a/src/openai/types/eval_delete_response.py b/src/openai/types/eval_delete_response.py
new file mode 100644
index 0000000000..a27261e242
--- /dev/null
+++ b/src/openai/types/eval_delete_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["EvalDeleteResponse"]
+
+
+class EvalDeleteResponse(BaseModel):
+    deleted: bool
+
+    eval_id: str
+
+    object: str
diff --git a/src/openai/types/eval_list_params.py b/src/openai/types/eval_list_params.py
new file mode 100644
index 0000000000..d9a12d0ddf
--- /dev/null
+++ b/src/openai/types/eval_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EvalListParams"]
+
+
+class EvalListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last eval from the previous pagination request."""
+
+    limit: int
+    """Number of evals to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for evals by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order.
+    """
+
+    order_by: Literal["created_at", "updated_at"]
+    """Evals can be ordered by creation time or last updated time.
+
+    Use `created_at` for creation time or `updated_at` for last updated time.
+    """
diff --git a/src/openai/types/eval_list_response.py b/src/openai/types/eval_list_response.py
new file mode 100644
index 0000000000..5ac4997cf6
--- /dev/null
+++ b/src/openai/types/eval_list_response.py
@@ -0,0 +1,111 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalListResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_retrieve_response.py b/src/openai/types/eval_retrieve_response.py
new file mode 100644
index 0000000000..758f9cc040
--- /dev/null
+++ b/src/openai/types/eval_retrieve_response.py
@@ -0,0 +1,111 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalRetrieveResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_stored_completions_data_source_config.py b/src/openai/types/eval_stored_completions_data_source_config.py
new file mode 100644
index 0000000000..98f86a4719
--- /dev/null
+++ b/src/openai/types/eval_stored_completions_data_source_config.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["EvalStoredCompletionsDataSourceConfig"]
+
+
+class EvalStoredCompletionsDataSourceConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["stored_completions"]
+    """The type of data source. Always `stored_completions`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/eval_update_params.py b/src/openai/types/eval_update_params.py
new file mode 100644
index 0000000000..042db29af5
--- /dev/null
+++ b/src/openai/types/eval_update_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["EvalUpdateParams"]
+
+
+class EvalUpdateParams(TypedDict, total=False):
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """Rename the evaluation."""
diff --git a/src/openai/types/eval_update_response.py b/src/openai/types/eval_update_response.py
new file mode 100644
index 0000000000..3f0b90ae03
--- /dev/null
+++ b/src/openai/types/eval_update_response.py
@@ -0,0 +1,111 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalUpdateResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalUpdateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/evals/__init__.py b/src/openai/types/evals/__init__.py
new file mode 100644
index 0000000000..ebf84c6b8d
--- /dev/null
+++ b/src/openai/types/evals/__init__.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .eval_api_error import EvalAPIError as EvalAPIError
+from .run_list_params import RunListParams as RunListParams
+from .run_create_params import RunCreateParams as RunCreateParams
+from .run_list_response import RunListResponse as RunListResponse
+from .run_cancel_response import RunCancelResponse as RunCancelResponse
+from .run_create_response import RunCreateResponse as RunCreateResponse
+from .run_delete_response import RunDeleteResponse as RunDeleteResponse
+from .run_retrieve_response import RunRetrieveResponse as RunRetrieveResponse
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import (
+    CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource,
+)
+from .create_eval_jsonl_run_data_source_param import (
+    CreateEvalJSONLRunDataSourceParam as CreateEvalJSONLRunDataSourceParam,
+)
+from .create_eval_completions_run_data_source_param import (
+    CreateEvalCompletionsRunDataSourceParam as CreateEvalCompletionsRunDataSourceParam,
+)
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
new file mode 100644
index 0000000000..0a942cd200
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -0,0 +1,200 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from ..chat.chat_completion_tool import ChatCompletionTool
+from ..shared.response_format_text import ResponseFormatText
+from ..responses.easy_input_message import EasyInputMessage
+from ..responses.response_input_text import ResponseInputText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSource",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateMessage",
+    "InputMessagesTemplateTemplateMessageContent",
+    "InputMessagesTemplateTemplateMessageContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+    "SamplingParamsResponseFormat",
+]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(BaseModel):
+    type: Literal["stored_completions"]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int] = None
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int] = None
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int] = None
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str] = None
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Annotated[
+    Union[SourceFileContent, SourceFileID, SourceStoredCompletions], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplateTemplateMessageContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateMessageContent: TypeAlias = Union[
+    str, ResponseInputText, InputMessagesTemplateTemplateMessageContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateMessage(BaseModel):
+    content: InputMessagesTemplateTemplateMessageContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Annotated[
+    Union[EasyInputMessage, InputMessagesTemplateTemplateMessage], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplate(BaseModel):
+    template: List[InputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Annotated[
+    Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
+]
+
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
+
+class SamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    response_format: Optional[SamplingParamsResponseFormat] = None
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    tools: Optional[List[ChatCompletionTool]] = None
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSource(BaseModel):
+    source: Source
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[InputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[SamplingParams] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
new file mode 100644
index 0000000000..84344fcd94
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -0,0 +1,194 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from ..chat.chat_completion_tool_param import ChatCompletionToolParam
+from ..responses.easy_input_message_param import EasyInputMessageParam
+from ..shared_params.response_format_text import ResponseFormatText
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateMessage",
+    "InputMessagesTemplateTemplateMessageContent",
+    "InputMessagesTemplateTemplateMessageContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+    "SamplingParamsResponseFormat",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(TypedDict, total=False):
+    type: Required[Literal["stored_completions"]]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int]
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int]
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int]
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str]
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID, SourceStoredCompletions]
+
+
+class InputMessagesTemplateTemplateMessageContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateMessageContent: TypeAlias = Union[
+    str, ResponseInputTextParam, InputMessagesTemplateTemplateMessageContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateMessage(TypedDict, total=False):
+    content: Required[InputMessagesTemplateTemplateMessageContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[EasyInputMessageParam, InputMessagesTemplateTemplateMessage]
+
+
+class InputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[InputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
+
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
+
+class SamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    response_format: SamplingParamsResponseFormat
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    tools: Iterable[ChatCompletionToolParam]
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSourceParam(TypedDict, total=False):
+    source: Required[Source]
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Required[Literal["completions"]]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: InputMessages
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: SamplingParams
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source.py b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
new file mode 100644
index 0000000000..ae36f8c55f
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["CreateEvalJSONLRunDataSource", "Source", "SourceFileContent", "SourceFileContentContent", "SourceFileID"]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Annotated[Union[SourceFileContent, SourceFileID], PropertyInfo(discriminator="type")]
+
+
+class CreateEvalJSONLRunDataSource(BaseModel):
+    source: Source
+    """Determines what populates the `item` namespace in the data source."""
+
+    type: Literal["jsonl"]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
new file mode 100644
index 0000000000..217ee36346
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "CreateEvalJSONLRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID]
+
+
+class CreateEvalJSONLRunDataSourceParam(TypedDict, total=False):
+    source: Required[Source]
+    """Determines what populates the `item` namespace in the data source."""
+
+    type: Required[Literal["jsonl"]]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/eval_api_error.py b/src/openai/types/evals/eval_api_error.py
new file mode 100644
index 0000000000..fe76871024
--- /dev/null
+++ b/src/openai/types/evals/eval_api_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["EvalAPIError"]
+
+
+class EvalAPIError(BaseModel):
+    code: str
+    """The error code."""
+
+    message: str
+    """The error message."""
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
new file mode 100644
index 0000000000..12cc868045
--- /dev/null
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -0,0 +1,370 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCancelResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCancelResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
new file mode 100644
index 0000000000..354a81132e
--- /dev/null
+++ b/src/openai/types/evals/run_create_params.py
@@ -0,0 +1,292 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..responses.tool_param import ToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text_param import ResponseInputTextParam
+from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from ..responses.response_format_text_config_param import ResponseFormatTextConfigParam
+from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
+
+__all__ = [
+    "RunCreateParams",
+    "DataSource",
+    "DataSourceCreateEvalResponsesRunDataSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileID",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceResponses",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessages",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParams",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText",
+]
+
+
+class RunCreateParams(TypedDict, total=False):
+    data_source: Required[DataSource]
+    """Details about the run's data source."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the run."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total=False):
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int]
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int]
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str]
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object]
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str]
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float]
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]]
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float]
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]]
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceSource: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileContent,
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileID,
+    DataSourceCreateEvalResponsesRunDataSourceSourceResponses,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText(
+    TypedDict, total=False
+):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
+    content: Required[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessages: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText(TypedDict, total=False):
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    text: DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCreateEvalResponsesRunDataSource(TypedDict, total=False):
+    source: Required[DataSourceCreateEvalResponsesRunDataSourceSource]
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: DataSourceCreateEvalResponsesRunDataSourceInputMessages
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: DataSourceCreateEvalResponsesRunDataSourceSamplingParams
+
+
+DataSource: TypeAlias = Union[
+    CreateEvalJSONLRunDataSourceParam,
+    CreateEvalCompletionsRunDataSourceParam,
+    DataSourceCreateEvalResponsesRunDataSource,
+]
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
new file mode 100644
index 0000000000..776ebb413f
--- /dev/null
+++ b/src/openai/types/evals/run_create_response.py
@@ -0,0 +1,370 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCreateResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_delete_response.py b/src/openai/types/evals/run_delete_response.py
new file mode 100644
index 0000000000..d48d01f86c
--- /dev/null
+++ b/src/openai/types/evals/run_delete_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RunDeleteResponse"]
+
+
+class RunDeleteResponse(BaseModel):
+    deleted: Optional[bool] = None
+
+    object: Optional[str] = None
+
+    run_id: Optional[str] = None
diff --git a/src/openai/types/evals/run_list_params.py b/src/openai/types/evals/run_list_params.py
new file mode 100644
index 0000000000..383b89d85c
--- /dev/null
+++ b/src/openai/types/evals/run_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RunListParams"]
+
+
+class RunListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last run from the previous pagination request."""
+
+    limit: int
+    """Number of runs to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for runs by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["queued", "in_progress", "completed", "canceled", "failed"]
+    """Filter runs by status.
+
+    One of `queued` | `in_progress` | `failed` | `completed` | `canceled`.
+    """
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
new file mode 100644
index 0000000000..9e2374f93c
--- /dev/null
+++ b/src/openai/types/evals/run_list_response.py
@@ -0,0 +1,370 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunListResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
new file mode 100644
index 0000000000..a4f43ce3f9
--- /dev/null
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -0,0 +1,370 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunRetrieveResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/__init__.py b/src/openai/types/evals/runs/__init__.py
new file mode 100644
index 0000000000..b77cbb6acd
--- /dev/null
+++ b/src/openai/types/evals/runs/__init__.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .output_item_list_params import OutputItemListParams as OutputItemListParams
+from .output_item_list_response import OutputItemListResponse as OutputItemListResponse
+from .output_item_retrieve_response import OutputItemRetrieveResponse as OutputItemRetrieveResponse
diff --git a/src/openai/types/evals/runs/output_item_list_params.py b/src/openai/types/evals/runs/output_item_list_params.py
new file mode 100644
index 0000000000..073bfc69a7
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputItemListParams"]
+
+
+class OutputItemListParams(TypedDict, total=False):
+    eval_id: Required[str]
+
+    after: str
+    """Identifier for the last output item from the previous pagination request."""
+
+    limit: int
+    """Number of output items to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for output items by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["fail", "pass"]
+    """Filter output items by status.
+
+    Use `failed` to filter by failed output items or `pass` to filter by passed
+    output items.
+    """
diff --git a/src/openai/types/evals/runs/output_item_list_response.py b/src/openai/types/evals/runs/output_item_list_response.py
new file mode 100644
index 0000000000..72b1049f7b
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemListResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Dict[str, builtins.object]]
+    """A list of results from the evaluation run."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/output_item_retrieve_response.py b/src/openai/types/evals/runs/output_item_retrieve_response.py
new file mode 100644
index 0000000000..63aab5565f
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_retrieve_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemRetrieveResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Dict[str, builtins.object]]
+    """A list of results from the evaluation run."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/file_chunking_strategy.py b/src/openai/types/file_chunking_strategy.py
new file mode 100644
index 0000000000..ee96bd7884
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject
+
+__all__ = ["FileChunkingStrategy"]
+
+FileChunkingStrategy: TypeAlias = Annotated[
+    Union[StaticFileChunkingStrategyObject, OtherFileChunkingStrategyObject], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/file_chunking_strategy_param.py b/src/openai/types/file_chunking_strategy_param.py
new file mode 100644
index 0000000000..25d94286d8
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam
+from .static_file_chunking_strategy_object_param import StaticFileChunkingStrategyObjectParam
+
+__all__ = ["FileChunkingStrategyParam"]
+
+FileChunkingStrategyParam: TypeAlias = Union[AutoFileChunkingStrategyParam, StaticFileChunkingStrategyObjectParam]
diff --git a/src/openai/types/file_content.py b/src/openai/types/file_content.py
index 92b316b9eb..d89eee623e 100644
--- a/src/openai/types/file_content.py
+++ b/src/openai/types/file_content.py
@@ -1,6 +1,7 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing_extensions import TypeAlias
 
 __all__ = ["FileContent"]
 
-FileContent = str
+FileContent: TypeAlias = str
diff --git a/src/openai/types/file_create_params.py b/src/openai/types/file_create_params.py
index a59ddb2817..728dfd350f 100644
--- a/src/openai/types/file_create_params.py
+++ b/src/openai/types/file_create_params.py
@@ -1,10 +1,11 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Required, TypedDict
 
 from .._types import FileTypes
+from .file_purpose import FilePurpose
 
 __all__ = ["FileCreateParams"]
 
@@ -13,13 +14,11 @@ class FileCreateParams(TypedDict, total=False):
     file: Required[FileTypes]
     """The File object (not file name) to be uploaded."""
 
-    purpose: Required[Literal["fine-tune", "assistants"]]
+    purpose: Required[FilePurpose]
     """The intended purpose of the uploaded file.
 
-    Use "fine-tune" for
-    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-    "assistants" for
-    [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-    [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-    us to validate the format of the uploaded file is correct for fine-tuning.
+    One of: - `assistants`: Used in the Assistants API - `batch`: Used in the Batch
+    API - `fine-tune`: Used for fine-tuning - `vision`: Images used for vision
+    fine-tuning - `user_data`: Flexible file type for any purpose - `evals`: Used
+    for eval data sets
     """
diff --git a/src/openai/types/file_deleted.py b/src/openai/types/file_deleted.py
index 3ac8592ff6..f25fa87a8d 100644
--- a/src/openai/types/file_deleted.py
+++ b/src/openai/types/file_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/file_list_params.py b/src/openai/types/file_list_params.py
index a962dd239c..058d874c29 100644
--- a/src/openai/types/file_list_params.py
+++ b/src/openai/types/file_list_params.py
@@ -1,12 +1,33 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing_extensions import TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["FileListParams"]
 
 
 class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 10,000, and the default is 10,000.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
+
     purpose: str
     """Only return files with the given purpose."""
diff --git a/src/openai/types/file_object.py b/src/openai/types/file_object.py
index 4ae91b754e..1d65e6987d 100644
--- a/src/openai/types/file_object.py
+++ b/src/openai/types/file_object.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 from typing_extensions import Literal
@@ -24,11 +24,13 @@ class FileObject(BaseModel):
     object: Literal["file"]
     """The object type, which is always `file`."""
 
-    purpose: Literal["fine-tune", "fine-tune-results", "assistants", "assistants_output"]
+    purpose: Literal[
+        "assistants", "assistants_output", "batch", "batch_output", "fine-tune", "fine-tune-results", "vision"
+    ]
     """The intended purpose of the file.
 
-    Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and
-    `assistants_output`.
+    Supported values are `assistants`, `assistants_output`, `batch`, `batch_output`,
+    `fine-tune`, `fine-tune-results` and `vision`.
     """
 
     status: Literal["uploaded", "processed", "error"]
@@ -38,6 +40,9 @@ class FileObject(BaseModel):
     `error`.
     """
 
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the file will expire."""
+
     status_details: Optional[str] = None
     """Deprecated.
 
diff --git a/src/openai/types/file_purpose.py b/src/openai/types/file_purpose.py
new file mode 100644
index 0000000000..b2c2d5f9fc
--- /dev/null
+++ b/src/openai/types/file_purpose.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["FilePurpose"]
+
+FilePurpose: TypeAlias = Literal["assistants", "batch", "fine-tune", "vision", "user_data", "evals"]
diff --git a/src/openai/types/fine_tune.py b/src/openai/types/fine_tune.py
deleted file mode 100644
index de1e097ee4..0000000000
--- a/src/openai/types/fine_tune.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .file_object import FileObject
-from .fine_tune_event import FineTuneEvent
-
-__all__ = ["FineTune", "Hyperparams"]
-
-
-class Hyperparams(BaseModel):
-    batch_size: int
-    """The batch size to use for training.
-
-    The batch size is the number of training examples used to train a single forward
-    and backward pass.
-    """
-
-    learning_rate_multiplier: float
-    """The learning rate multiplier to use for training."""
-
-    n_epochs: int
-    """The number of epochs to train the model for.
-
-    An epoch refers to one full cycle through the training dataset.
-    """
-
-    prompt_loss_weight: float
-    """The weight to use for loss on the prompt tokens."""
-
-    classification_n_classes: Optional[int] = None
-    """The number of classes to use for computing classification metrics."""
-
-    classification_positive_class: Optional[str] = None
-    """The positive class to use for computing classification metrics."""
-
-    compute_classification_metrics: Optional[bool] = None
-    """
-    The classification metrics to compute using the validation dataset at the end of
-    every epoch.
-    """
-
-
-class FineTune(BaseModel):
-    id: str
-    """The object identifier, which can be referenced in the API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
-
-    fine_tuned_model: Optional[str]
-    """The name of the fine-tuned model that is being created."""
-
-    hyperparams: Hyperparams
-    """The hyperparameters used for the fine-tuning job.
-
-    See the
-    [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/hyperparameters)
-    for more details.
-    """
-
-    model: str
-    """The base model that is being fine-tuned."""
-
-    object: Literal["fine-tune"]
-    """The object type, which is always "fine-tune"."""
-
-    organization_id: str
-    """The organization that owns the fine-tuning job."""
-
-    result_files: List[FileObject]
-    """The compiled results files for the fine-tuning job."""
-
-    status: str
-    """
-    The current status of the fine-tuning job, which can be either `created`,
-    `running`, `succeeded`, `failed`, or `cancelled`.
-    """
-
-    training_files: List[FileObject]
-    """The list of files used for training."""
-
-    updated_at: int
-    """The Unix timestamp (in seconds) for when the fine-tuning job was last updated."""
-
-    validation_files: List[FileObject]
-    """The list of files used for validation."""
-
-    events: Optional[List[FineTuneEvent]] = None
-    """
-    The list of events that have been observed in the lifecycle of the FineTune job.
-    """
diff --git a/src/openai/types/fine_tune_create_params.py b/src/openai/types/fine_tune_create_params.py
deleted file mode 100644
index 1be9c9ea04..0000000000
--- a/src/openai/types/fine_tune_create_params.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["FineTuneCreateParams", "Hyperparameters"]
-
-
-class FineTuneCreateParams(TypedDict, total=False):
-    training_file: Required[str]
-    """The ID of an uploaded file that contains training data.
-
-    See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-    for how to upload a file.
-
-    Your dataset must be formatted as a JSONL file, where each training example is a
-    JSON object with the keys "prompt" and "completion". Additionally, you must
-    upload your file with the purpose `fine-tune`.
-
-    See the
-    [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-    for more details.
-    """
-
-    batch_size: Optional[int]
-    """The batch size to use for training.
-
-    The batch size is the number of training examples used to train a single forward
-    and backward pass.
-
-    By default, the batch size will be dynamically configured to be ~0.2% of the
-    number of examples in the training set, capped at 256 - in general, we've found
-    that larger batch sizes tend to work better for larger datasets.
-    """
-
-    classification_betas: Optional[List[float]]
-    """If this is provided, we calculate F-beta scores at the specified beta values.
-
-    The F-beta score is a generalization of F-1 score. This is only used for binary
-    classification.
-
-    With a beta of 1 (i.e. the F-1 score), precision and recall are given the same
-    weight. A larger beta score puts more weight on recall and less on precision. A
-    smaller beta score puts more weight on precision and less on recall.
-    """
-
-    classification_n_classes: Optional[int]
-    """The number of classes in a classification task.
-
-    This parameter is required for multiclass classification.
-    """
-
-    classification_positive_class: Optional[str]
-    """The positive class in binary classification.
-
-    This parameter is needed to generate precision, recall, and F1 metrics when
-    doing binary classification.
-    """
-
-    compute_classification_metrics: Optional[bool]
-    """
-    If set, we calculate classification-specific metrics such as accuracy and F-1
-    score using the validation set at the end of every epoch. These metrics can be
-    viewed in the
-    [results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-
-    In order to compute classification metrics, you must provide a
-    `validation_file`. Additionally, you must specify `classification_n_classes` for
-    multiclass classification or `classification_positive_class` for binary
-    classification.
-    """
-
-    hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
-
-    learning_rate_multiplier: Optional[float]
-    """
-    The learning rate multiplier to use for training. The fine-tuning learning rate
-    is the original learning rate used for pretraining multiplied by this value.
-
-    By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 depending on
-    final `batch_size` (larger learning rates tend to perform better with larger
-    batch sizes). We recommend experimenting with values in the range 0.02 to 0.2 to
-    see what produces the best results.
-    """
-
-    model: Union[str, Literal["ada", "babbage", "curie", "davinci"], None]
-    """The name of the base model to fine-tune.
-
-    You can select one of "ada", "babbage", "curie", "davinci", or a fine-tuned
-    model created after 2022-04-21 and before 2023-08-22. To learn more about these
-    models, see the [Models](https://platform.openai.com/docs/models) documentation.
-    """
-
-    prompt_loss_weight: Optional[float]
-    """The weight to use for loss on the prompt tokens.
-
-    This controls how much the model tries to learn to generate the prompt (as
-    compared to the completion which always has a weight of 1.0), and can add a
-    stabilizing effect to training when completions are short.
-
-    If prompts are extremely long (relative to completions), it may make sense to
-    reduce this weight so as to avoid over-prioritizing learning the prompt.
-    """
-
-    suffix: Optional[str]
-    """
-    A string of up to 40 characters that will be added to your fine-tuned model
-    name.
-
-    For example, a `suffix` of "custom-model-name" would produce a model name like
-    `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.
-    """
-
-    validation_file: Optional[str]
-    """The ID of an uploaded file that contains validation data.
-
-    If you provide this file, the data is used to generate validation metrics
-    periodically during fine-tuning. These metrics can be viewed in the
-    [fine-tuning results file](https://platform.openai.com/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
-    Your train and validation data should be mutually exclusive.
-
-    Your dataset must be formatted as a JSONL file, where each validation example is
-    a JSON object with the keys "prompt" and "completion". Additionally, you must
-    upload your file with the purpose `fine-tune`.
-
-    See the
-    [fine-tuning guide](https://platform.openai.com/docs/guides/legacy-fine-tuning/creating-training-data)
-    for more details.
-    """
-
-
-class Hyperparameters(TypedDict, total=False):
-    n_epochs: Union[Literal["auto"], int]
-    """The number of epochs to train the model for.
-
-    An epoch refers to one full cycle through the training dataset.
-    """
diff --git a/src/openai/types/fine_tune_event.py b/src/openai/types/fine_tune_event.py
deleted file mode 100644
index 299f0de24b..0000000000
--- a/src/openai/types/fine_tune_event.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["FineTuneEvent"]
-
-
-class FineTuneEvent(BaseModel):
-    created_at: int
-
-    level: str
-
-    message: str
-
-    object: Literal["fine-tune-event"]
diff --git a/src/openai/types/fine_tune_events_list_response.py b/src/openai/types/fine_tune_events_list_response.py
deleted file mode 100644
index c69746104d..0000000000
--- a/src/openai/types/fine_tune_events_list_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .fine_tune_event import FineTuneEvent
-
-__all__ = ["FineTuneEventsListResponse"]
-
-
-class FineTuneEventsListResponse(BaseModel):
-    data: List[FineTuneEvent]
-
-    object: Literal["list"]
diff --git a/src/openai/types/fine_tune_list_events_params.py b/src/openai/types/fine_tune_list_events_params.py
deleted file mode 100644
index 1f23b108e6..0000000000
--- a/src/openai/types/fine_tune_list_events_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["FineTuneListEventsParamsBase", "FineTuneListEventsParamsNonStreaming", "FineTuneListEventsParamsStreaming"]
-
-
-class FineTuneListEventsParamsBase(TypedDict, total=False):
-    pass
-
-
-class FineTuneListEventsParamsNonStreaming(FineTuneListEventsParamsBase):
-    stream: Literal[False]
-    """Whether to stream events for the fine-tune job.
-
-    If set to true, events will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available. The stream will terminate with a `data: [DONE]`
-    message when the job is finished (succeeded, cancelled, or failed).
-
-    If set to false, only events generated so far will be returned.
-    """
-
-
-class FineTuneListEventsParamsStreaming(FineTuneListEventsParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to stream events for the fine-tune job.
-
-    If set to true, events will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available. The stream will terminate with a `data: [DONE]`
-    message when the job is finished (succeeded, cancelled, or failed).
-
-    If set to false, only events generated so far will be returned.
-    """
-
-
-FineTuneListEventsParams = Union[FineTuneListEventsParamsNonStreaming, FineTuneListEventsParamsStreaming]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index d24160c5bd..cc664eacea 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -1,9 +1,26 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+from .dpo_method import DpoMethod as DpoMethod
 from .fine_tuning_job import FineTuningJob as FineTuningJob
 from .job_list_params import JobListParams as JobListParams
+from .dpo_method_param import DpoMethodParam as DpoMethodParam
 from .job_create_params import JobCreateParams as JobCreateParams
+from .supervised_method import SupervisedMethod as SupervisedMethod
+from .dpo_hyperparameters import DpoHyperparameters as DpoHyperparameters
+from .reinforcement_method import ReinforcementMethod as ReinforcementMethod
 from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
 from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .supervised_method_param import SupervisedMethodParam as SupervisedMethodParam
+from .dpo_hyperparameters_param import DpoHyperparametersParam as DpoHyperparametersParam
+from .reinforcement_method_param import ReinforcementMethodParam as ReinforcementMethodParam
+from .supervised_hyperparameters import SupervisedHyperparameters as SupervisedHyperparameters
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .reinforcement_hyperparameters import ReinforcementHyperparameters as ReinforcementHyperparameters
+from .supervised_hyperparameters_param import SupervisedHyperparametersParam as SupervisedHyperparametersParam
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .reinforcement_hyperparameters_param import ReinforcementHyperparametersParam as ReinforcementHyperparametersParam
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/alpha/__init__.py b/src/openai/types/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..6394961b0b
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/__init__.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .grader_run_params import GraderRunParams as GraderRunParams
+from .grader_run_response import GraderRunResponse as GraderRunResponse
+from .grader_validate_params import GraderValidateParams as GraderValidateParams
+from .grader_validate_response import GraderValidateResponse as GraderValidateResponse
diff --git a/src/openai/types/fine_tuning/alpha/grader_run_params.py b/src/openai/types/fine_tuning/alpha/grader_run_params.py
new file mode 100644
index 0000000000..646407fe09
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_run_params.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ...graders.multi_grader_param import MultiGraderParam
+from ...graders.python_grader_param import PythonGraderParam
+from ...graders.score_model_grader_param import ScoreModelGraderParam
+from ...graders.string_check_grader_param import StringCheckGraderParam
+from ...graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["GraderRunParams", "Grader"]
+
+
+class GraderRunParams(TypedDict, total=False):
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+    model_sample: Required[str]
+    """The model sample to be evaluated.
+
+    This value will be used to populate the `sample` namespace. See
+    [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+    The `output_json` variable will be populated if the model sample is a valid JSON
+    string.
+    """
+
+    item: object
+    """The dataset item provided to the grader.
+
+    This will be used to populate the `item` namespace. See
+    [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+    """
+
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
diff --git a/src/openai/types/fine_tuning/alpha/grader_run_response.py b/src/openai/types/fine_tuning/alpha/grader_run_response.py
new file mode 100644
index 0000000000..8ef046d133
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_run_response.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+
+from pydantic import Field as FieldInfo
+
+from ...._models import BaseModel
+
+__all__ = ["GraderRunResponse", "Metadata", "MetadataErrors"]
+
+
+class MetadataErrors(BaseModel):
+    formula_parse_error: bool
+
+    invalid_variable_error: bool
+
+    api_model_grader_parse_error: bool = FieldInfo(alias="model_grader_parse_error")
+
+    api_model_grader_refusal_error: bool = FieldInfo(alias="model_grader_refusal_error")
+
+    api_model_grader_server_error: bool = FieldInfo(alias="model_grader_server_error")
+
+    api_model_grader_server_error_details: Optional[str] = FieldInfo(
+        alias="model_grader_server_error_details", default=None
+    )
+
+    other_error: bool
+
+    python_grader_runtime_error: bool
+
+    python_grader_runtime_error_details: Optional[str] = None
+
+    python_grader_server_error: bool
+
+    python_grader_server_error_type: Optional[str] = None
+
+    sample_parse_error: bool
+
+    truncated_observation_error: bool
+
+    unresponsive_reward_error: bool
+
+
+class Metadata(BaseModel):
+    errors: MetadataErrors
+
+    execution_time: float
+
+    name: str
+
+    sampled_model_name: Optional[str] = None
+
+    scores: Dict[str, object]
+
+    token_usage: Optional[int] = None
+
+    type: str
+
+
+class GraderRunResponse(BaseModel):
+    metadata: Metadata
+
+    api_model_grader_token_usage_per_model: Dict[str, object] = FieldInfo(alias="model_grader_token_usage_per_model")
+
+    reward: float
+
+    sub_rewards: Dict[str, object]
diff --git a/src/openai/types/fine_tuning/alpha/grader_validate_params.py b/src/openai/types/fine_tuning/alpha/grader_validate_params.py
new file mode 100644
index 0000000000..fe9eb44e32
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_validate_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ...graders.multi_grader_param import MultiGraderParam
+from ...graders.python_grader_param import PythonGraderParam
+from ...graders.score_model_grader_param import ScoreModelGraderParam
+from ...graders.string_check_grader_param import StringCheckGraderParam
+from ...graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["GraderValidateParams", "Grader"]
+
+
+class GraderValidateParams(TypedDict, total=False):
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
diff --git a/src/openai/types/fine_tuning/alpha/grader_validate_response.py b/src/openai/types/fine_tuning/alpha/grader_validate_response.py
new file mode 100644
index 0000000000..b373292d80
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_validate_response.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import TypeAlias
+
+from ...._models import BaseModel
+from ...graders.multi_grader import MultiGrader
+from ...graders.python_grader import PythonGrader
+from ...graders.score_model_grader import ScoreModelGrader
+from ...graders.string_check_grader import StringCheckGrader
+from ...graders.text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["GraderValidateResponse", "Grader"]
+
+Grader: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, MultiGrader]
+
+
+class GraderValidateResponse(BaseModel):
+    grader: Optional[Grader] = None
+    """The grader used for the fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/checkpoints/__init__.py b/src/openai/types/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..2947b33145
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .permission_create_params import PermissionCreateParams as PermissionCreateParams
+from .permission_create_response import PermissionCreateResponse as PermissionCreateResponse
+from .permission_delete_response import PermissionDeleteResponse as PermissionDeleteResponse
+from .permission_retrieve_params import PermissionRetrieveParams as PermissionRetrieveParams
+from .permission_retrieve_response import PermissionRetrieveResponse as PermissionRetrieveResponse
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_params.py b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
new file mode 100644
index 0000000000..92f98f21b9
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["PermissionCreateParams"]
+
+
+class PermissionCreateParams(TypedDict, total=False):
+    project_ids: Required[List[str]]
+    """The project identifiers to grant access to."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_response.py b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
new file mode 100644
index 0000000000..9bc14c00cc
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionCreateResponse"]
+
+
+class PermissionCreateResponse(BaseModel):
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
new file mode 100644
index 0000000000..1a92d912fa
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionDeleteResponse"]
+
+
+class PermissionDeleteResponse(BaseModel):
+    id: str
+    """The ID of the fine-tuned model checkpoint permission that was deleted."""
+
+    deleted: bool
+    """Whether the fine-tuned model checkpoint permission was successfully deleted."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
new file mode 100644
index 0000000000..6e66a867ca
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["PermissionRetrieveParams"]
+
+
+class PermissionRetrieveParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last permission ID from the previous pagination request."""
+
+    limit: int
+    """Number of permissions to retrieve."""
+
+    order: Literal["ascending", "descending"]
+    """The order in which to retrieve permissions."""
+
+    project_id: str
+    """The ID of the project to get permissions for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
new file mode 100644
index 0000000000..14c73b55d0
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionRetrieveResponse", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
+
+
+class PermissionRetrieveResponse(BaseModel):
+    data: List[Data]
+
+    has_more: bool
+
+    object: Literal["list"]
+
+    first_id: Optional[str] = None
+
+    last_id: Optional[str] = None
diff --git a/src/openai/types/fine_tuning/dpo_hyperparameters.py b/src/openai/types/fine_tuning/dpo_hyperparameters.py
new file mode 100644
index 0000000000..b0b3f0581b
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_hyperparameters.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["DpoHyperparameters"]
+
+
+class DpoHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float, None] = None
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/dpo_hyperparameters_param.py b/src/openai/types/fine_tuning/dpo_hyperparameters_param.py
new file mode 100644
index 0000000000..87c6ee80a5
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_hyperparameters_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["DpoHyperparametersParam"]
+
+
+class DpoHyperparametersParam(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float]
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/dpo_method.py b/src/openai/types/fine_tuning/dpo_method.py
new file mode 100644
index 0000000000..3e20f360dd
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_method.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .dpo_hyperparameters import DpoHyperparameters
+
+__all__ = ["DpoMethod"]
+
+
+class DpoMethod(BaseModel):
+    hyperparameters: Optional[DpoHyperparameters] = None
+    """The hyperparameters used for the DPO fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/dpo_method_param.py b/src/openai/types/fine_tuning/dpo_method_param.py
new file mode 100644
index 0000000000..ce6b6510f6
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_method_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .dpo_hyperparameters_param import DpoHyperparametersParam
+
+__all__ = ["DpoMethodParam"]
+
+
+class DpoMethodParam(TypedDict, total=False):
+    hyperparameters: DpoHyperparametersParam
+    """The hyperparameters used for the DPO fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 3897176a47..f626fbba64 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -1,11 +1,16 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .dpo_method import DpoMethod
+from ..shared.metadata import Metadata
+from .supervised_method import SupervisedMethod
+from .reinforcement_method import ReinforcementMethod
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = ["FineTuningJob", "Error", "Hyperparameters", "Method"]
 
 
 class Error(BaseModel):
@@ -15,7 +20,7 @@ class Error(BaseModel):
     message: str
     """A human-readable error message."""
 
-    param: Optional[str]
+    param: Optional[str] = None
     """The parameter that was invalid, usually `training_file` or `validation_file`.
 
     This field will be null if the failure was not parameter-specific.
@@ -23,15 +28,40 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
     """The number of epochs to train the model for.
 
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
+    An epoch refers to one full cycle through the training dataset.
     """
 
 
+class Method(BaseModel):
+    type: Literal["supervised", "dpo", "reinforcement"]
+    """The type of method. Is either `supervised`, `dpo`, or `reinforcement`."""
+
+    dpo: Optional[DpoMethod] = None
+    """Configuration for the DPO fine-tuning method."""
+
+    reinforcement: Optional[ReinforcementMethod] = None
+    """Configuration for the reinforcement fine-tuning method."""
+
+    supervised: Optional[SupervisedMethod] = None
+    """Configuration for the supervised fine-tuning method."""
+
+
 class FineTuningJob(BaseModel):
     id: str
     """The object identifier, which can be referenced in the API endpoints."""
@@ -39,19 +69,19 @@ class FineTuningJob(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
-    error: Optional[Error]
+    error: Optional[Error] = None
     """
     For fine-tuning jobs that have `failed`, this will contain more information on
     the cause of the failure.
     """
 
-    fine_tuned_model: Optional[str]
+    fine_tuned_model: Optional[str] = None
     """The name of the fine-tuned model that is being created.
 
     The value will be null if the fine-tuning job is still running.
     """
 
-    finished_at: Optional[int]
+    finished_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the fine-tuning job was finished.
 
     The value will be null if the fine-tuning job is still running.
@@ -60,8 +90,7 @@ class FineTuningJob(BaseModel):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
+    This value will only be returned when running `supervised` jobs.
     """
 
     model: str
@@ -80,13 +109,16 @@ class FineTuningJob(BaseModel):
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
 
+    seed: int
+    """The seed used for the fine-tuning job."""
+
     status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
     """
     The current status of the fine-tuning job, which can be either
     `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
     """
 
-    trained_tokens: Optional[int]
+    trained_tokens: Optional[int] = None
     """The total number of billable tokens processed by this fine-tuning job.
 
     The value will be null if the fine-tuning job is still running.
@@ -99,9 +131,31 @@ class FineTuningJob(BaseModel):
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
 
-    validation_file: Optional[str]
+    validation_file: Optional[str] = None
     """The file ID used for validation.
 
     You can retrieve the validation results with the
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
+
+    estimated_finish: Optional[int] = None
+    """
+    The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
+    finish. The value will be null if the fine-tuning job is not running.
+    """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Optional[Method] = None
+    """The method used for fine-tuning."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py
index 62f268868b..1d728bd765 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_event.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,5 +1,7 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -9,11 +11,22 @@
 
 class FineTuningJobEvent(BaseModel):
     id: str
+    """The object identifier."""
 
     created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     level: Literal["info", "warn", "error"]
+    """The log level of the event."""
 
     message: str
+    """The message of the event."""
 
     object: Literal["fine_tuning.job.event"]
+    """The object type, which is always "fine_tuning.job.event"."""
+
+    data: Optional[builtins.object] = None
+    """The data associated with the event."""
+
+    type: Optional[Literal["message", "metrics"]] = None
+    """The type of event."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..2af73fbffb
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index da750ffc19..5514db1ed1 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -1,44 +1,84 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters"]
+from .dpo_method_param import DpoMethodParam
+from ..shared_params.metadata import Metadata
+from .supervised_method_param import SupervisedMethodParam
+from .reinforcement_method_param import ReinforcementMethodParam
+
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb", "Method"]
 
 
 class JobCreateParams(TypedDict, total=False):
-    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]]]
+    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]]]
     """The name of the model to fine-tune.
 
     You can select one of the
-    [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+    [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
     """
 
     training_file: Required[str]
     """The ID of an uploaded file that contains training data.
 
-    See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
     for how to upload a file.
 
     Your dataset must be formatted as a JSONL file. Additionally, you must upload
     your file with the purpose `fine-tune`.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    The contents of the file should differ depending on if the model uses the
+    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+    [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+    format, or if the fine-tuning method uses the
+    [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+    format.
+
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """
 
     hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
+    """
+    The hyperparameters used for the fine-tuning job. This value is now deprecated
+    in favor of `method`, and should be passed in under the `method` parameter.
+    """
+
+    integrations: Optional[Iterable[Integration]]
+    """A list of integrations to enable for your fine-tuning job."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Method
+    """The method used for fine-tuning."""
+
+    seed: Optional[int]
+    """The seed controls the reproducibility of the job.
+
+    Passing in the same seed and job parameters should produce the same results, but
+    may differ in rare cases. If a seed is not specified, one will be generated for
+    you.
+    """
 
     suffix: Optional[str]
     """
-    A string of up to 18 characters that will be added to your fine-tuned model
+    A string of up to 64 characters that will be added to your fine-tuned model
     name.
 
     For example, a `suffix` of "custom-model-name" would produce a model name like
-    `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+    `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
     """
 
     validation_file: Optional[str]
@@ -52,7 +92,8 @@ class JobCreateParams(TypedDict, total=False):
     Your dataset must be formatted as a JSONL file. You must upload your file with
     the purpose `fine-tune`.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """
 
@@ -76,3 +117,59 @@ class Hyperparameters(TypedDict, total=False):
 
     An epoch refers to one full cycle through the training dataset.
     """
+
+
+class IntegrationWandb(TypedDict, total=False):
+    project: Required[str]
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str]
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str]
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: List[str]
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class Integration(TypedDict, total=False):
+    type: Required[Literal["wandb"]]
+    """The type of integration to enable.
+
+    Currently, only "wandb" (Weights and Biases) is supported.
+    """
+
+    wandb: Required[IntegrationWandb]
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
+
+
+class Method(TypedDict, total=False):
+    type: Required[Literal["supervised", "dpo", "reinforcement"]]
+    """The type of method. Is either `supervised`, `dpo`, or `reinforcement`."""
+
+    dpo: DpoMethodParam
+    """Configuration for the DPO fine-tuning method."""
+
+    reinforcement: ReinforcementMethodParam
+    """Configuration for the reinforcement fine-tuning method."""
+
+    supervised: SupervisedMethodParam
+    """Configuration for the supervised fine-tuning method."""
diff --git a/src/openai/types/fine_tuning/job_list_events_params.py b/src/openai/types/fine_tuning/job_list_events_params.py
index 7be3d53315..e1c9a64dc8 100644
--- a/src/openai/types/fine_tuning/job_list_events_params.py
+++ b/src/openai/types/fine_tuning/job_list_events_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
diff --git a/src/openai/types/fine_tuning/job_list_params.py b/src/openai/types/fine_tuning/job_list_params.py
index 8160136901..b79f3ce86a 100644
--- a/src/openai/types/fine_tuning/job_list_params.py
+++ b/src/openai/types/fine_tuning/job_list_params.py
@@ -1,7 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+from typing import Dict, Optional
 from typing_extensions import TypedDict
 
 __all__ = ["JobListParams"]
@@ -13,3 +14,10 @@ class JobListParams(TypedDict, total=False):
 
     limit: int
     """Number of fine-tuning jobs to retrieve."""
+
+    metadata: Optional[Dict[str, str]]
+    """Optional metadata filter.
+
+    To filter, use the syntax `metadata[k]=v`. Alternatively, set `metadata=null` to
+    indicate no metadata.
+    """
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last checkpoint ID from the previous pagination request."""
+
+    limit: int
+    """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+    full_valid_loss: Optional[float] = None
+
+    full_valid_mean_token_accuracy: Optional[float] = None
+
+    step: Optional[float] = None
+
+    train_loss: Optional[float] = None
+
+    train_mean_token_accuracy: Optional[float] = None
+
+    valid_loss: Optional[float] = None
+
+    valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+    id: str
+    """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+    fine_tuned_model_checkpoint: str
+    """The name of the fine-tuned checkpoint model that is created."""
+
+    fine_tuning_job_id: str
+    """The name of the fine-tuning job that this checkpoint was created from."""
+
+    metrics: Metrics
+    """Metrics at the step number during the fine-tuning job."""
+
+    object: Literal["fine_tuning.job.checkpoint"]
+    """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+    step_number: int
+    """The step number that the checkpoint was created at."""
diff --git a/src/openai/types/fine_tuning/reinforcement_hyperparameters.py b/src/openai/types/fine_tuning/reinforcement_hyperparameters.py
new file mode 100644
index 0000000000..7c1762d38c
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_hyperparameters.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ReinforcementHyperparameters"]
+
+
+class ReinforcementHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    compute_multiplier: Union[Literal["auto"], float, None] = None
+    """
+    Multiplier on amount of compute used for exploring search space during training.
+    """
+
+    eval_interval: Union[Literal["auto"], int, None] = None
+    """The number of training steps between evaluation runs."""
+
+    eval_samples: Union[Literal["auto"], int, None] = None
+    """Number of evaluation samples to generate per training step."""
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+    reasoning_effort: Optional[Literal["default", "low", "medium", "high"]] = None
+    """Level of reasoning effort."""
diff --git a/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py b/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py
new file mode 100644
index 0000000000..0cc12fcb17
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ReinforcementHyperparametersParam"]
+
+
+class ReinforcementHyperparametersParam(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    compute_multiplier: Union[Literal["auto"], float]
+    """
+    Multiplier on amount of compute used for exploring search space during training.
+    """
+
+    eval_interval: Union[Literal["auto"], int]
+    """The number of training steps between evaluation runs."""
+
+    eval_samples: Union[Literal["auto"], int]
+    """Number of evaluation samples to generate per training step."""
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+    reasoning_effort: Literal["default", "low", "medium", "high"]
+    """Level of reasoning effort."""
diff --git a/src/openai/types/fine_tuning/reinforcement_method.py b/src/openai/types/fine_tuning/reinforcement_method.py
new file mode 100644
index 0000000000..9b65c41033
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_method.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+from ..graders.multi_grader import MultiGrader
+from ..graders.python_grader import PythonGrader
+from ..graders.score_model_grader import ScoreModelGrader
+from ..graders.string_check_grader import StringCheckGrader
+from .reinforcement_hyperparameters import ReinforcementHyperparameters
+from ..graders.text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["ReinforcementMethod", "Grader"]
+
+Grader: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, MultiGrader]
+
+
+class ReinforcementMethod(BaseModel):
+    grader: Grader
+    """The grader used for the fine-tuning job."""
+
+    hyperparameters: Optional[ReinforcementHyperparameters] = None
+    """The hyperparameters used for the reinforcement fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/reinforcement_method_param.py b/src/openai/types/fine_tuning/reinforcement_method_param.py
new file mode 100644
index 0000000000..00d5060536
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_method_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ..graders.multi_grader_param import MultiGraderParam
+from ..graders.python_grader_param import PythonGraderParam
+from ..graders.score_model_grader_param import ScoreModelGraderParam
+from ..graders.string_check_grader_param import StringCheckGraderParam
+from .reinforcement_hyperparameters_param import ReinforcementHyperparametersParam
+from ..graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["ReinforcementMethodParam", "Grader"]
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
+
+
+class ReinforcementMethodParam(TypedDict, total=False):
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+    hyperparameters: ReinforcementHyperparametersParam
+    """The hyperparameters used for the reinforcement fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/supervised_hyperparameters.py b/src/openai/types/fine_tuning/supervised_hyperparameters.py
new file mode 100644
index 0000000000..3955ecf437
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_hyperparameters.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["SupervisedHyperparameters"]
+
+
+class SupervisedHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/supervised_hyperparameters_param.py b/src/openai/types/fine_tuning/supervised_hyperparameters_param.py
new file mode 100644
index 0000000000..bd37d9b239
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_hyperparameters_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["SupervisedHyperparametersParam"]
+
+
+class SupervisedHyperparametersParam(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/supervised_method.py b/src/openai/types/fine_tuning/supervised_method.py
new file mode 100644
index 0000000000..3a32bf27a0
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_method.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .supervised_hyperparameters import SupervisedHyperparameters
+
+__all__ = ["SupervisedMethod"]
+
+
+class SupervisedMethod(BaseModel):
+    hyperparameters: Optional[SupervisedHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/supervised_method_param.py b/src/openai/types/fine_tuning/supervised_method_param.py
new file mode 100644
index 0000000000..ba277853d7
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_method_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .supervised_hyperparameters_param import SupervisedHyperparametersParam
+
+__all__ = ["SupervisedMethodParam"]
+
+
+class SupervisedMethodParam(TypedDict, total=False):
+    hyperparameters: SupervisedHyperparametersParam
+    """The hyperparameters used for the fine-tuning job."""
diff --git a/src/openai/types/graders/__init__.py b/src/openai/types/graders/__init__.py
new file mode 100644
index 0000000000..e0a909125e
--- /dev/null
+++ b/src/openai/types/graders/__init__.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .multi_grader import MultiGrader as MultiGrader
+from .python_grader import PythonGrader as PythonGrader
+from .label_model_grader import LabelModelGrader as LabelModelGrader
+from .multi_grader_param import MultiGraderParam as MultiGraderParam
+from .score_model_grader import ScoreModelGrader as ScoreModelGrader
+from .python_grader_param import PythonGraderParam as PythonGraderParam
+from .string_check_grader import StringCheckGrader as StringCheckGrader
+from .text_similarity_grader import TextSimilarityGrader as TextSimilarityGrader
+from .label_model_grader_param import LabelModelGraderParam as LabelModelGraderParam
+from .score_model_grader_param import ScoreModelGraderParam as ScoreModelGraderParam
+from .string_check_grader_param import StringCheckGraderParam as StringCheckGraderParam
+from .text_similarity_grader_param import TextSimilarityGraderParam as TextSimilarityGraderParam
diff --git a/src/openai/types/graders/label_model_grader.py b/src/openai/types/graders/label_model_grader.py
new file mode 100644
index 0000000000..d95ccc6df6
--- /dev/null
+++ b/src/openai/types/graders/label_model_grader.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..responses.response_input_text import ResponseInputText
+
+__all__ = ["LabelModelGrader", "Input", "InputContent", "InputContentOutputText"]
+
+
+class InputContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputContent: TypeAlias = Union[str, ResponseInputText, InputContentOutputText]
+
+
+class Input(BaseModel):
+    content: InputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class LabelModelGrader(BaseModel):
+    input: List[Input]
+
+    labels: List[str]
+    """The labels to assign to each item in the evaluation."""
+
+    model: str
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: str
+    """The name of the grader."""
+
+    passing_labels: List[str]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Literal["label_model"]
+    """The object type, which is always `label_model`."""
diff --git a/src/openai/types/graders/label_model_grader_param.py b/src/openai/types/graders/label_model_grader_param.py
new file mode 100644
index 0000000000..76d01421ee
--- /dev/null
+++ b/src/openai/types/graders/label_model_grader_param.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..responses.response_input_text_param import ResponseInputTextParam
+
+__all__ = ["LabelModelGraderParam", "Input", "InputContent", "InputContentOutputText"]
+
+
+class InputContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+InputContent: TypeAlias = Union[str, ResponseInputTextParam, InputContentOutputText]
+
+
+class Input(TypedDict, total=False):
+    content: Required[InputContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+class LabelModelGraderParam(TypedDict, total=False):
+    input: Required[Iterable[Input]]
+
+    labels: Required[List[str]]
+    """The labels to assign to each item in the evaluation."""
+
+    model: Required[str]
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    passing_labels: Required[List[str]]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Required[Literal["label_model"]]
+    """The object type, which is always `label_model`."""
diff --git a/src/openai/types/graders/multi_grader.py b/src/openai/types/graders/multi_grader.py
new file mode 100644
index 0000000000..7539c68ef5
--- /dev/null
+++ b/src/openai/types/graders/multi_grader.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .python_grader import PythonGrader
+from .label_model_grader import LabelModelGrader
+from .score_model_grader import ScoreModelGrader
+from .string_check_grader import StringCheckGrader
+from .text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["MultiGrader", "Graders"]
+
+Graders: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, LabelModelGrader]
+
+
+class MultiGrader(BaseModel):
+    calculate_output: str
+    """A formula to calculate the output based on grader results."""
+
+    graders: Graders
+    """
+    A StringCheckGrader object that performs a string comparison between input and
+    reference using a specified operation.
+    """
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["multi"]
+    """The object type, which is always `multi`."""
diff --git a/src/openai/types/graders/multi_grader_param.py b/src/openai/types/graders/multi_grader_param.py
new file mode 100644
index 0000000000..28a6705b81
--- /dev/null
+++ b/src/openai/types/graders/multi_grader_param.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .python_grader_param import PythonGraderParam
+from .label_model_grader_param import LabelModelGraderParam
+from .score_model_grader_param import ScoreModelGraderParam
+from .string_check_grader_param import StringCheckGraderParam
+from .text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["MultiGraderParam", "Graders"]
+
+Graders: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, LabelModelGraderParam
+]
+
+
+class MultiGraderParam(TypedDict, total=False):
+    calculate_output: Required[str]
+    """A formula to calculate the output based on grader results."""
+
+    graders: Required[Graders]
+    """
+    A StringCheckGrader object that performs a string comparison between input and
+    reference using a specified operation.
+    """
+
+    name: Required[str]
+    """The name of the grader."""
+
+    type: Required[Literal["multi"]]
+    """The object type, which is always `multi`."""
diff --git a/src/openai/types/graders/python_grader.py b/src/openai/types/graders/python_grader.py
new file mode 100644
index 0000000000..faa10b1ef9
--- /dev/null
+++ b/src/openai/types/graders/python_grader.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["PythonGrader"]
+
+
+class PythonGrader(BaseModel):
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
diff --git a/src/openai/types/graders/python_grader_param.py b/src/openai/types/graders/python_grader_param.py
new file mode 100644
index 0000000000..efb923751e
--- /dev/null
+++ b/src/openai/types/graders/python_grader_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["PythonGraderParam"]
+
+
+class PythonGraderParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the grader."""
+
+    source: Required[str]
+    """The source code of the python script."""
+
+    type: Required[Literal["python"]]
+    """The object type, which is always `python`."""
+
+    image_tag: str
+    """The image tag to use for the python script."""
diff --git a/src/openai/types/graders/score_model_grader.py b/src/openai/types/graders/score_model_grader.py
new file mode 100644
index 0000000000..1349f75a58
--- /dev/null
+++ b/src/openai/types/graders/score_model_grader.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..responses.response_input_text import ResponseInputText
+
+__all__ = ["ScoreModelGrader", "Input", "InputContent", "InputContentOutputText"]
+
+
+class InputContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputContent: TypeAlias = Union[str, ResponseInputText, InputContentOutputText]
+
+
+class Input(BaseModel):
+    content: InputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class ScoreModelGrader(BaseModel):
+    input: List[Input]
+    """The input text. This may include template strings."""
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[object] = None
+    """The sampling parameters for the model."""
diff --git a/src/openai/types/graders/score_model_grader_param.py b/src/openai/types/graders/score_model_grader_param.py
new file mode 100644
index 0000000000..673f14e47d
--- /dev/null
+++ b/src/openai/types/graders/score_model_grader_param.py
@@ -0,0 +1,55 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..responses.response_input_text_param import ResponseInputTextParam
+
+__all__ = ["ScoreModelGraderParam", "Input", "InputContent", "InputContentOutputText"]
+
+
+class InputContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+InputContent: TypeAlias = Union[str, ResponseInputTextParam, InputContentOutputText]
+
+
+class Input(TypedDict, total=False):
+    content: Required[InputContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+class ScoreModelGraderParam(TypedDict, total=False):
+    input: Required[Iterable[Input]]
+    """The input text. This may include template strings."""
+
+    model: Required[str]
+    """The model to use for the evaluation."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    type: Required[Literal["score_model"]]
+    """The object type, which is always `score_model`."""
+
+    range: Iterable[float]
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: object
+    """The sampling parameters for the model."""
diff --git a/src/openai/types/graders/string_check_grader.py b/src/openai/types/graders/string_check_grader.py
new file mode 100644
index 0000000000..3bf0b8c868
--- /dev/null
+++ b/src/openai/types/graders/string_check_grader.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["StringCheckGrader"]
+
+
+class StringCheckGrader(BaseModel):
+    input: str
+    """The input text. This may include template strings."""
+
+    name: str
+    """The name of the grader."""
+
+    operation: Literal["eq", "ne", "like", "ilike"]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: str
+    """The reference text. This may include template strings."""
+
+    type: Literal["string_check"]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/graders/string_check_grader_param.py b/src/openai/types/graders/string_check_grader_param.py
new file mode 100644
index 0000000000..27b204cec0
--- /dev/null
+++ b/src/openai/types/graders/string_check_grader_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["StringCheckGraderParam"]
+
+
+class StringCheckGraderParam(TypedDict, total=False):
+    input: Required[str]
+    """The input text. This may include template strings."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    operation: Required[Literal["eq", "ne", "like", "ilike"]]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: Required[str]
+    """The reference text. This may include template strings."""
+
+    type: Required[Literal["string_check"]]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/graders/text_similarity_grader.py b/src/openai/types/graders/text_similarity_grader.py
new file mode 100644
index 0000000000..738d317766
--- /dev/null
+++ b/src/openai/types/graders/text_similarity_grader.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TextSimilarityGrader"]
+
+
+class TextSimilarityGrader(BaseModel):
+    evaluation_metric: Literal[
+        "fuzzy_match", "bleu", "gleu", "meteor", "rouge_1", "rouge_2", "rouge_3", "rouge_4", "rouge_5", "rouge_l"
+    ]
+    """The evaluation metric to use.
+
+    One of `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`,
+    `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: str
+    """The text being graded."""
+
+    name: str
+    """The name of the grader."""
+
+    reference: str
+    """The text being graded against."""
+
+    type: Literal["text_similarity"]
+    """The type of grader."""
diff --git a/src/openai/types/graders/text_similarity_grader_param.py b/src/openai/types/graders/text_similarity_grader_param.py
new file mode 100644
index 0000000000..db14553217
--- /dev/null
+++ b/src/openai/types/graders/text_similarity_grader_param.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TextSimilarityGraderParam"]
+
+
+class TextSimilarityGraderParam(TypedDict, total=False):
+    evaluation_metric: Required[
+        Literal[
+            "fuzzy_match", "bleu", "gleu", "meteor", "rouge_1", "rouge_2", "rouge_3", "rouge_4", "rouge_5", "rouge_l"
+        ]
+    ]
+    """The evaluation metric to use.
+
+    One of `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`,
+    `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: Required[str]
+    """The text being graded."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    reference: Required[str]
+    """The text being graded against."""
+
+    type: Required[Literal["text_similarity"]]
+    """The type of grader."""
diff --git a/src/openai/types/image.py b/src/openai/types/image.py
index a040caf7b6..ecaef3fd58 100644
--- a/src/openai/types/image.py
+++ b/src/openai/types/image.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 
@@ -9,16 +9,18 @@
 
 class Image(BaseModel):
     b64_json: Optional[str] = None
-    """
-    The base64-encoded JSON of the generated image, if `response_format` is
-    `b64_json`.
+    """The base64-encoded JSON of the generated image.
+
+    Default value for `gpt-image-1`, and only present if `response_format` is set to
+    `b64_json` for `dall-e-2` and `dall-e-3`.
     """
 
     revised_prompt: Optional[str] = None
-    """
-    The prompt that was used to generate the image, if there was any revision to the
-    prompt.
-    """
+    """For `dall-e-3` only, the revised prompt that was used to generate the image."""
 
     url: Optional[str] = None
-    """The URL of the generated image, if `response_format` is `url` (default)."""
+    """
+    When using `dall-e-2` or `dall-e-3`, the URL of the generated image if
+    `response_format` is set to `url` (default value). Unsupported for
+    `gpt-image-1`.
+    """
diff --git a/src/openai/types/image_create_variation_params.py b/src/openai/types/image_create_variation_params.py
index 7b015fc176..d10b74b2c2 100644
--- a/src/openai/types/image_create_variation_params.py
+++ b/src/openai/types/image_create_variation_params.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .image_model import ImageModel
 
 __all__ = ["ImageCreateVariationParams"]
 
@@ -17,22 +18,20 @@ class ImageCreateVariationParams(TypedDict, total=False):
     Must be a valid PNG file, less than 4MB, and square.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
     Only `dall-e-2` is supported at this time.
     """
 
     n: Optional[int]
-    """The number of images to generate.
-
-    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
-    """
+    """The number of images to generate. Must be between 1 and 10."""
 
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated.
     """
 
     size: Optional[Literal["256x256", "512x512", "1024x1024"]]
@@ -45,5 +44,5 @@ class ImageCreateVariationParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_edit_params.py b/src/openai/types/image_edit_params.py
index 043885cc38..aecb98fa6f 100644
--- a/src/openai/types/image_edit_params.py
+++ b/src/openai/types/image_edit_params.py
@@ -1,60 +1,103 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .image_model import ImageModel
 
 __all__ = ["ImageEditParams"]
 
 
 class ImageEditParams(TypedDict, total=False):
-    image: Required[FileTypes]
-    """The image to edit.
+    image: Required[Union[FileTypes, List[FileTypes]]]
+    """The image(s) to edit. Must be a supported image file or an array of images.
 
-    Must be a valid PNG file, less than 4MB, and square. If mask is not provided,
-    image must have transparency, which will be used as the mask.
+    For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+    50MB. You can provide up to 16 images.
+
+    For `dall-e-2`, you can only provide one image, and it should be a square `png`
+    file less than 4MB.
     """
 
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters.
+    The maximum length is 1000 characters for `dall-e-2`, and 32000 characters for
+    `gpt-image-1`.
+    """
+
+    background: Optional[Literal["transparent", "opaque", "auto"]]
+    """Allows to set transparency for the background of the generated image(s).
+
+    This parameter is only supported for `gpt-image-1`. Must be one of
+    `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
+    model will automatically determine the best background for the image.
+
+    If `transparent`, the output format needs to support transparency, so it should
+    be set to either `png` (default value) or `webp`.
     """
 
     mask: FileTypes
     """An additional image whose fully transparent areas (e.g.
 
-    where alpha is zero) indicate where `image` should be edited. Must be a valid
-    PNG file, less than 4MB, and have the same dimensions as `image`.
+    where alpha is zero) indicate where `image` should be edited. If there are
+    multiple images provided, the mask will be applied on the first image. Must be a
+    valid PNG file, less than 4MB, and have the same dimensions as `image`.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
-    Only `dall-e-2` is supported at this time.
+    Only `dall-e-2` and `gpt-image-1` are supported. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
     """
 
     n: Optional[int]
     """The number of images to generate. Must be between 1 and 10."""
 
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`. The default value is `png`.
+    """
+
+    quality: Optional[Literal["standard", "low", "medium", "high", "auto"]]
+    """The quality of the image that will be generated.
+
+    `high`, `medium` and `low` are only supported for `gpt-image-1`. `dall-e-2` only
+    supports `standard` quality. Defaults to `auto`.
+    """
+
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated. This parameter is only supported for `dall-e-2`, as
+    `gpt-image-1` will always return base64-encoded images.
     """
 
-    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+    size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024`.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`.
     """
 
     user: str
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_generate_params.py b/src/openai/types/image_generate_params.py
index 7eca29a7ba..8fc10220dc 100644
--- a/src/openai/types/image_generate_params.py
+++ b/src/openai/types/image_generate_params.py
@@ -1,10 +1,12 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .image_model import ImageModel
+
 __all__ = ["ImageGenerateParams"]
 
 
@@ -12,12 +14,33 @@ class ImageGenerateParams(TypedDict, total=False):
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters for `dall-e-2` and 4000 characters for
-    `dall-e-3`.
+    The maximum length is 32000 characters for `gpt-image-1`, 1000 characters for
+    `dall-e-2` and 4000 characters for `dall-e-3`.
+    """
+
+    background: Optional[Literal["transparent", "opaque", "auto"]]
+    """Allows to set transparency for the background of the generated image(s).
+
+    This parameter is only supported for `gpt-image-1`. Must be one of
+    `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
+    model will automatically determine the best background for the image.
+
+    If `transparent`, the output format needs to support transparency, so it should
+    be set to either `png` (default value) or `webp`.
     """
 
-    model: Union[str, Literal["dall-e-2", "dall-e-3"], None]
-    """The model to use for image generation."""
+    model: Union[str, ImageModel, None]
+    """The model to use for image generation.
+
+    One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
+    """
+
+    moderation: Optional[Literal["low", "auto"]]
+    """Control the content-moderation level for images generated by `gpt-image-1`.
+
+    Must be either `low` for less restrictive filtering or `auto` (default value).
+    """
 
     n: Optional[int]
     """The number of images to generate.
@@ -25,38 +48,62 @@ class ImageGenerateParams(TypedDict, total=False):
     Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
     """
 
-    quality: Literal["standard", "hd"]
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`.
+    """
+
+    quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]]
     """The quality of the image that will be generated.
 
-    `hd` creates images with finer details and greater consistency across the image.
-    This param is only supported for `dall-e-3`.
+    - `auto` (default value) will automatically select the best quality for the
+      given model.
+    - `high`, `medium` and `low` are supported for `gpt-image-1`.
+    - `hd` and `standard` are supported for `dall-e-3`.
+    - `standard` is the only option for `dall-e-2`.
     """
 
     response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
+    """The format in which generated images with `dall-e-2` and `dall-e-3` are
+    returned.
 
-    Must be one of `url` or `b64_json`.
+    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
+    image has been generated. This parameter isn't supported for `gpt-image-1` which
+    will always return base64-encoded images.
     """
 
-    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
+    size: Optional[
+        Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+    ]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one
-    of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792`
+    for `dall-e-3`.
     """
 
     style: Optional[Literal["vivid", "natural"]]
     """The style of the generated images.
 
-    Must be one of `vivid` or `natural`. Vivid causes the model to lean towards
-    generating hyper-real and dramatic images. Natural causes the model to produce
-    more natural, less hyper-real looking images. This param is only supported for
-    `dall-e-3`.
+    This parameter is only supported for `dall-e-3`. Must be one of `vivid` or
+    `natural`. Vivid causes the model to lean towards generating hyper-real and
+    dramatic images. Natural causes the model to produce more natural, less
+    hyper-real looking images.
     """
 
     user: str
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_model.py b/src/openai/types/image_model.py
new file mode 100644
index 0000000000..7fed69ed82
--- /dev/null
+++ b/src/openai/types/image_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ImageModel"]
+
+ImageModel: TypeAlias = Literal["dall-e-2", "dall-e-3", "gpt-image-1"]
diff --git a/src/openai/types/images_response.py b/src/openai/types/images_response.py
index 9d1bc95a42..2a8ca728ab 100644
--- a/src/openai/types/images_response.py
+++ b/src/openai/types/images_response.py
@@ -1,14 +1,60 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
+from typing_extensions import Literal
 
 from .image import Image
 from .._models import BaseModel
 
-__all__ = ["ImagesResponse"]
+__all__ = ["ImagesResponse", "Usage", "UsageInputTokensDetails"]
+
+
+class UsageInputTokensDetails(BaseModel):
+    image_tokens: int
+    """The number of image tokens in the input prompt."""
+
+    text_tokens: int
+    """The number of text tokens in the input prompt."""
+
+
+class Usage(BaseModel):
+    input_tokens: int
+    """The number of tokens (images and text) in the input prompt."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """The input tokens detailed information for the image generation."""
+
+    output_tokens: int
+    """The number of image tokens in the output image."""
+
+    total_tokens: int
+    """The total number of tokens (images and text) used for the image generation."""
 
 
 class ImagesResponse(BaseModel):
     created: int
+    """The Unix timestamp (in seconds) of when the image was created."""
+
+    background: Optional[Literal["transparent", "opaque"]] = None
+    """The background parameter used for the image generation.
+
+    Either `transparent` or `opaque`.
+    """
+
+    data: Optional[List[Image]] = None
+    """The list of generated images."""
+
+    output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+    """The output format of the image generation. Either `png`, `webp`, or `jpeg`."""
+
+    quality: Optional[Literal["low", "medium", "high"]] = None
+    """The quality of the image generated. Either `low`, `medium`, or `high`."""
+
+    size: Optional[Literal["1024x1024", "1024x1536", "1536x1024"]] = None
+    """The size of the image generated.
+
+    Either `1024x1024`, `1024x1536`, or `1536x1024`.
+    """
 
-    data: List[Image]
+    usage: Optional[Usage] = None
+    """For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/model.py b/src/openai/types/model.py
index 58f3997f70..2631ee8d1a 100644
--- a/src/openai/types/model.py
+++ b/src/openai/types/model.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing_extensions import Literal
 
diff --git a/src/openai/types/model_deleted.py b/src/openai/types/model_deleted.py
index 5329da1378..e7601f74e4 100644
--- a/src/openai/types/model_deleted.py
+++ b/src/openai/types/model_deleted.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from .._models import BaseModel
 
diff --git a/src/openai/types/moderation.py b/src/openai/types/moderation.py
index 3602a46985..608f562218 100644
--- a/src/openai/types/moderation.py
+++ b/src/openai/types/moderation.py
@@ -1,10 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
 
 from pydantic import Field as FieldInfo
 
 from .._models import BaseModel
 
-__all__ = ["Moderation", "Categories", "CategoryScores"]
+__all__ = ["Moderation", "Categories", "CategoryAppliedInputTypes", "CategoryScores"]
 
 
 class Categories(BaseModel):
@@ -25,7 +28,7 @@ class Categories(BaseModel):
     Content that expresses, incites, or promotes hate based on race, gender,
     ethnicity, religion, nationality, sexual orientation, disability status, or
     caste. Hateful content aimed at non-protected groups (e.g., chess players) is
-    harrassment.
+    harassment.
     """
 
     hate_threatening: bool = FieldInfo(alias="hate/threatening")
@@ -35,6 +38,20 @@ class Categories(BaseModel):
     orientation, disability status, or caste.
     """
 
+    illicit: Optional[bool] = None
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing, or that gives advice or instruction on how to commit
+    illicit acts. For example, "how to shoplift" would fit this category.
+    """
+
+    illicit_violent: Optional[bool] = FieldInfo(alias="illicit/violent", default=None)
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing that also includes violence, or that gives advice or
+    instruction on the procurement of any weapon.
+    """
+
     self_harm: bool = FieldInfo(alias="self-harm")
     """
     Content that promotes, encourages, or depicts acts of self-harm, such as
@@ -71,6 +88,47 @@ class Categories(BaseModel):
     """Content that depicts death, violence, or physical injury in graphic detail."""
 
 
+class CategoryAppliedInputTypes(BaseModel):
+    harassment: List[Literal["text"]]
+    """The applied input type(s) for the category 'harassment'."""
+
+    harassment_threatening: List[Literal["text"]] = FieldInfo(alias="harassment/threatening")
+    """The applied input type(s) for the category 'harassment/threatening'."""
+
+    hate: List[Literal["text"]]
+    """The applied input type(s) for the category 'hate'."""
+
+    hate_threatening: List[Literal["text"]] = FieldInfo(alias="hate/threatening")
+    """The applied input type(s) for the category 'hate/threatening'."""
+
+    illicit: List[Literal["text"]]
+    """The applied input type(s) for the category 'illicit'."""
+
+    illicit_violent: List[Literal["text"]] = FieldInfo(alias="illicit/violent")
+    """The applied input type(s) for the category 'illicit/violent'."""
+
+    self_harm: List[Literal["text", "image"]] = FieldInfo(alias="self-harm")
+    """The applied input type(s) for the category 'self-harm'."""
+
+    self_harm_instructions: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/instructions")
+    """The applied input type(s) for the category 'self-harm/instructions'."""
+
+    self_harm_intent: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/intent")
+    """The applied input type(s) for the category 'self-harm/intent'."""
+
+    sexual: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'sexual'."""
+
+    sexual_minors: List[Literal["text"]] = FieldInfo(alias="sexual/minors")
+    """The applied input type(s) for the category 'sexual/minors'."""
+
+    violence: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'violence'."""
+
+    violence_graphic: List[Literal["text", "image"]] = FieldInfo(alias="violence/graphic")
+    """The applied input type(s) for the category 'violence/graphic'."""
+
+
 class CategoryScores(BaseModel):
     harassment: float
     """The score for the category 'harassment'."""
@@ -84,6 +142,12 @@ class CategoryScores(BaseModel):
     hate_threatening: float = FieldInfo(alias="hate/threatening")
     """The score for the category 'hate/threatening'."""
 
+    illicit: float
+    """The score for the category 'illicit'."""
+
+    illicit_violent: float = FieldInfo(alias="illicit/violent")
+    """The score for the category 'illicit/violent'."""
+
     self_harm: float = FieldInfo(alias="self-harm")
     """The score for the category 'self-harm'."""
 
@@ -110,11 +174,13 @@ class Moderation(BaseModel):
     categories: Categories
     """A list of the categories, and whether they are flagged or not."""
 
+    category_applied_input_types: CategoryAppliedInputTypes
+    """
+    A list of the categories along with the input type(s) that the score applies to.
+    """
+
     category_scores: CategoryScores
     """A list of the categories along with their scores as predicted by model."""
 
     flagged: bool
-    """
-    Whether the content violates
-    [OpenAI's usage policies](/policies/usage-policies).
-    """
+    """Whether any of the below categories are flagged."""
diff --git a/src/openai/types/moderation_create_params.py b/src/openai/types/moderation_create_params.py
index 25ed3ce940..3ea2f3cd88 100644
--- a/src/openai/types/moderation_create_params.py
+++ b/src/openai/types/moderation_create_params.py
@@ -1,25 +1,29 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .moderation_model import ModerationModel
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["ModerationCreateParams"]
 
 
 class ModerationCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str]]]
-    """The input text to classify"""
+    input: Required[Union[str, List[str], Iterable[ModerationMultiModalInputParam]]]
+    """Input (or inputs) to classify.
 
-    model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+    Can be a single string, an array of strings, or an array of multi-modal input
+    objects similar to other models.
     """
-    Two content moderations models are available: `text-moderation-stable` and
-    `text-moderation-latest`.
-
-    The default is `text-moderation-latest` which will be automatically upgraded
-    over time. This ensures you are always using our most accurate model. If you use
-    `text-moderation-stable`, we will provide advanced notice before updating the
-    model. Accuracy of `text-moderation-stable` may be slightly lower than for
-    `text-moderation-latest`.
+
+    model: Union[str, ModerationModel]
+    """The content moderation model you would like to use.
+
+    Learn more in
+    [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+    learn about available models
+    [here](https://platform.openai.com/docs/models#moderation).
     """
diff --git a/src/openai/types/moderation_create_response.py b/src/openai/types/moderation_create_response.py
index 0962cdbfd9..79684f8a70 100644
--- a/src/openai/types/moderation_create_response.py
+++ b/src/openai/types/moderation_create_response.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List
 
diff --git a/src/openai/types/moderation_image_url_input_param.py b/src/openai/types/moderation_image_url_input_param.py
new file mode 100644
index 0000000000..9a69a6a257
--- /dev/null
+++ b/src/openai/types/moderation_image_url_input_param.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationImageURLInputParam", "ImageURL"]
+
+
+class ImageURL(TypedDict, total=False):
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+
+class ModerationImageURLInputParam(TypedDict, total=False):
+    image_url: Required[ImageURL]
+    """Contains either an image URL or a data URL for a base64 encoded image."""
+
+    type: Required[Literal["image_url"]]
+    """Always `image_url`."""
diff --git a/src/openai/types/moderation_model.py b/src/openai/types/moderation_model.py
new file mode 100644
index 0000000000..64954c4547
--- /dev/null
+++ b/src/openai/types/moderation_model.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ModerationModel"]
+
+ModerationModel: TypeAlias = Literal[
+    "omni-moderation-latest", "omni-moderation-2024-09-26", "text-moderation-latest", "text-moderation-stable"
+]
diff --git a/src/openai/types/moderation_multi_modal_input_param.py b/src/openai/types/moderation_multi_modal_input_param.py
new file mode 100644
index 0000000000..4314e7b031
--- /dev/null
+++ b/src/openai/types/moderation_multi_modal_input_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .moderation_text_input_param import ModerationTextInputParam
+from .moderation_image_url_input_param import ModerationImageURLInputParam
+
+__all__ = ["ModerationMultiModalInputParam"]
+
+ModerationMultiModalInputParam: TypeAlias = Union[ModerationImageURLInputParam, ModerationTextInputParam]
diff --git a/src/openai/types/moderation_text_input_param.py b/src/openai/types/moderation_text_input_param.py
new file mode 100644
index 0000000000..e5da53337b
--- /dev/null
+++ b/src/openai/types/moderation_text_input_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationTextInputParam"]
+
+
+class ModerationTextInputParam(TypedDict, total=False):
+    text: Required[str]
+    """A string of text to classify."""
+
+    type: Required[Literal["text"]]
+    """Always `text`."""
diff --git a/src/openai/types/other_file_chunking_strategy_object.py b/src/openai/types/other_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..e4cd61a8fc
--- /dev/null
+++ b/src/openai/types/other_file_chunking_strategy_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["OtherFileChunkingStrategyObject"]
+
+
+class OtherFileChunkingStrategyObject(BaseModel):
+    type: Literal["other"]
+    """Always `other`."""
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
new file mode 100644
index 0000000000..4316e47730
--- /dev/null
+++ b/src/openai/types/responses/__init__.py
@@ -0,0 +1,215 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .tool import Tool as Tool
+from .response import Response as Response
+from .tool_param import ToolParam as ToolParam
+from .computer_tool import ComputerTool as ComputerTool
+from .function_tool import FunctionTool as FunctionTool
+from .response_item import ResponseItem as ResponseItem
+from .response_error import ResponseError as ResponseError
+from .response_usage import ResponseUsage as ResponseUsage
+from .parsed_response import (
+    ParsedContent as ParsedContent,
+    ParsedResponse as ParsedResponse,
+    ParsedResponseOutputItem as ParsedResponseOutputItem,
+    ParsedResponseOutputText as ParsedResponseOutputText,
+    ParsedResponseOutputMessage as ParsedResponseOutputMessage,
+    ParsedResponseFunctionToolCall as ParsedResponseFunctionToolCall,
+)
+from .response_prompt import ResponsePrompt as ResponsePrompt
+from .response_status import ResponseStatus as ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp as ToolChoiceMcp
+from .web_search_tool import WebSearchTool as WebSearchTool
+from .file_search_tool import FileSearchTool as FileSearchTool
+from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .easy_input_message import EasyInputMessage as EasyInputMessage
+from .response_item_list import ResponseItemList as ResponseItemList
+from .computer_tool_param import ComputerToolParam as ComputerToolParam
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .response_includable import ResponseIncludable as ResponseIncludable
+from .response_input_file import ResponseInputFile as ResponseInputFile
+from .response_input_item import ResponseInputItem as ResponseInputItem
+from .response_input_text import ResponseInputText as ResponseInputText
+from .tool_choice_options import ToolChoiceOptions as ToolChoiceOptions
+from .response_error_event import ResponseErrorEvent as ResponseErrorEvent
+from .response_input_image import ResponseInputImage as ResponseInputImage
+from .response_input_param import ResponseInputParam as ResponseInputParam
+from .response_output_item import ResponseOutputItem as ResponseOutputItem
+from .response_output_text import ResponseOutputText as ResponseOutputText
+from .response_text_config import ResponseTextConfig as ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction as ToolChoiceFunction
+from .response_failed_event import ResponseFailedEvent as ResponseFailedEvent
+from .response_prompt_param import ResponsePromptParam as ResponsePromptParam
+from .response_queued_event import ResponseQueuedEvent as ResponseQueuedEvent
+from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
+from .tool_choice_mcp_param import ToolChoiceMcpParam as ToolChoiceMcpParam
+from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
+from .input_item_list_params import InputItemListParams as InputItemListParams
+from .response_create_params import ResponseCreateParams as ResponseCreateParams
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
+from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
+from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
+from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesParam
+from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
+from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
+from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_incomplete_event import ResponseIncompleteEvent as ResponseIncompleteEvent
+from .response_input_file_param import ResponseInputFileParam as ResponseInputFileParam
+from .response_input_item_param import ResponseInputItemParam as ResponseInputItemParam
+from .response_input_text_param import ResponseInputTextParam as ResponseInputTextParam
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent as ResponseInProgressEvent
+from .response_input_image_param import ResponseInputImageParam as ResponseInputImageParam
+from .response_output_text_param import ResponseOutputTextParam as ResponseOutputTextParam
+from .response_text_config_param import ResponseTextConfigParam as ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam as ToolChoiceFunctionParam
+from .response_computer_tool_call import ResponseComputerToolCall as ResponseComputerToolCall
+from .response_format_text_config import ResponseFormatTextConfig as ResponseFormatTextConfig
+from .response_function_tool_call import ResponseFunctionToolCall as ResponseFunctionToolCall
+from .response_input_message_item import ResponseInputMessageItem as ResponseInputMessageItem
+from .response_refusal_done_event import ResponseRefusalDoneEvent as ResponseRefusalDoneEvent
+from .response_function_web_search import ResponseFunctionWebSearch as ResponseFunctionWebSearch
+from .response_input_content_param import ResponseInputContentParam as ResponseInputContentParam
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent as ResponseRefusalDeltaEvent
+from .response_output_message_param import ResponseOutputMessageParam as ResponseOutputMessageParam
+from .response_output_refusal_param import ResponseOutputRefusalParam as ResponseOutputRefusalParam
+from .response_reasoning_done_event import ResponseReasoningDoneEvent as ResponseReasoningDoneEvent
+from .response_reasoning_item_param import ResponseReasoningItemParam as ResponseReasoningItemParam
+from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
+from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
+from .response_reasoning_delta_event import ResponseReasoningDeltaEvent as ResponseReasoningDeltaEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_function_tool_call_item import ResponseFunctionToolCallItem as ResponseFunctionToolCallItem
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .response_computer_tool_call_param import ResponseComputerToolCallParam as ResponseComputerToolCallParam
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .response_format_text_config_param import ResponseFormatTextConfigParam as ResponseFormatTextConfigParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam as ResponseFunctionToolCallParam
+from .response_mcp_call_completed_event import ResponseMcpCallCompletedEvent as ResponseMcpCallCompletedEvent
+from .response_function_web_search_param import ResponseFunctionWebSearchParam as ResponseFunctionWebSearchParam
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall as ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList as ResponseInputMessageContentList
+from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
+from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .response_reasoning_summary_done_event import (
+    ResponseReasoningSummaryDoneEvent as ResponseReasoningSummaryDoneEvent,
+)
+from .response_mcp_call_arguments_done_event import (
+    ResponseMcpCallArgumentsDoneEvent as ResponseMcpCallArgumentsDoneEvent,
+)
+from .response_reasoning_summary_delta_event import (
+    ResponseReasoningSummaryDeltaEvent as ResponseReasoningSummaryDeltaEvent,
+)
+from .response_computer_tool_call_output_item import (
+    ResponseComputerToolCallOutputItem as ResponseComputerToolCallOutputItem,
+)
+from .response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig as ResponseFormatTextJSONSchemaConfig,
+)
+from .response_function_tool_call_output_item import (
+    ResponseFunctionToolCallOutputItem as ResponseFunctionToolCallOutputItem,
+)
+from .response_image_gen_call_completed_event import (
+    ResponseImageGenCallCompletedEvent as ResponseImageGenCallCompletedEvent,
+)
+from .response_mcp_call_arguments_delta_event import (
+    ResponseMcpCallArgumentsDeltaEvent as ResponseMcpCallArgumentsDeltaEvent,
+)
+from .response_mcp_list_tools_completed_event import (
+    ResponseMcpListToolsCompletedEvent as ResponseMcpListToolsCompletedEvent,
+)
+from .response_image_gen_call_generating_event import (
+    ResponseImageGenCallGeneratingEvent as ResponseImageGenCallGeneratingEvent,
+)
+from .response_web_search_call_completed_event import (
+    ResponseWebSearchCallCompletedEvent as ResponseWebSearchCallCompletedEvent,
+)
+from .response_web_search_call_searching_event import (
+    ResponseWebSearchCallSearchingEvent as ResponseWebSearchCallSearchingEvent,
+)
+from .response_code_interpreter_tool_call_param import (
+    ResponseCodeInterpreterToolCallParam as ResponseCodeInterpreterToolCallParam,
+)
+from .response_file_search_call_completed_event import (
+    ResponseFileSearchCallCompletedEvent as ResponseFileSearchCallCompletedEvent,
+)
+from .response_file_search_call_searching_event import (
+    ResponseFileSearchCallSearchingEvent as ResponseFileSearchCallSearchingEvent,
+)
+from .response_image_gen_call_in_progress_event import (
+    ResponseImageGenCallInProgressEvent as ResponseImageGenCallInProgressEvent,
+)
+from .response_input_message_content_list_param import (
+    ResponseInputMessageContentListParam as ResponseInputMessageContentListParam,
+)
+from .response_mcp_list_tools_in_progress_event import (
+    ResponseMcpListToolsInProgressEvent as ResponseMcpListToolsInProgressEvent,
+)
+from .response_reasoning_summary_part_done_event import (
+    ResponseReasoningSummaryPartDoneEvent as ResponseReasoningSummaryPartDoneEvent,
+)
+from .response_reasoning_summary_text_done_event import (
+    ResponseReasoningSummaryTextDoneEvent as ResponseReasoningSummaryTextDoneEvent,
+)
+from .response_web_search_call_in_progress_event import (
+    ResponseWebSearchCallInProgressEvent as ResponseWebSearchCallInProgressEvent,
+)
+from .response_file_search_call_in_progress_event import (
+    ResponseFileSearchCallInProgressEvent as ResponseFileSearchCallInProgressEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_image_gen_call_partial_image_event import (
+    ResponseImageGenCallPartialImageEvent as ResponseImageGenCallPartialImageEvent,
+)
+from .response_output_text_annotation_added_event import (
+    ResponseOutputTextAnnotationAddedEvent as ResponseOutputTextAnnotationAddedEvent,
+)
+from .response_reasoning_summary_part_added_event import (
+    ResponseReasoningSummaryPartAddedEvent as ResponseReasoningSummaryPartAddedEvent,
+)
+from .response_reasoning_summary_text_delta_event import (
+    ResponseReasoningSummaryTextDeltaEvent as ResponseReasoningSummaryTextDeltaEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .response_computer_tool_call_output_screenshot import (
+    ResponseComputerToolCallOutputScreenshot as ResponseComputerToolCallOutputScreenshot,
+)
+from .response_format_text_json_schema_config_param import (
+    ResponseFormatTextJSONSchemaConfigParam as ResponseFormatTextJSONSchemaConfigParam,
+)
+from .response_code_interpreter_call_code_done_event import (
+    ResponseCodeInterpreterCallCodeDoneEvent as ResponseCodeInterpreterCallCodeDoneEvent,
+)
+from .response_code_interpreter_call_completed_event import (
+    ResponseCodeInterpreterCallCompletedEvent as ResponseCodeInterpreterCallCompletedEvent,
+)
+from .response_code_interpreter_call_code_delta_event import (
+    ResponseCodeInterpreterCallCodeDeltaEvent as ResponseCodeInterpreterCallCodeDeltaEvent,
+)
+from .response_code_interpreter_call_in_progress_event import (
+    ResponseCodeInterpreterCallInProgressEvent as ResponseCodeInterpreterCallInProgressEvent,
+)
+from .response_code_interpreter_call_interpreting_event import (
+    ResponseCodeInterpreterCallInterpretingEvent as ResponseCodeInterpreterCallInterpretingEvent,
+)
+from .response_computer_tool_call_output_screenshot_param import (
+    ResponseComputerToolCallOutputScreenshotParam as ResponseComputerToolCallOutputScreenshotParam,
+)
diff --git a/src/openai/types/responses/computer_tool.py b/src/openai/types/responses/computer_tool.py
new file mode 100644
index 0000000000..5b844f5bf4
--- /dev/null
+++ b/src/openai/types/responses/computer_tool.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComputerTool"]
+
+
+class ComputerTool(BaseModel):
+    display_height: int
+    """The height of the computer display."""
+
+    display_width: int
+    """The width of the computer display."""
+
+    environment: Literal["windows", "mac", "linux", "ubuntu", "browser"]
+    """The type of computer environment to control."""
+
+    type: Literal["computer_use_preview"]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/computer_tool_param.py b/src/openai/types/responses/computer_tool_param.py
new file mode 100644
index 0000000000..06a5c132ec
--- /dev/null
+++ b/src/openai/types/responses/computer_tool_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ComputerToolParam"]
+
+
+class ComputerToolParam(TypedDict, total=False):
+    display_height: Required[int]
+    """The height of the computer display."""
+
+    display_width: Required[int]
+    """The width of the computer display."""
+
+    environment: Required[Literal["windows", "mac", "linux", "ubuntu", "browser"]]
+    """The type of computer environment to control."""
+
+    type: Required[Literal["computer_use_preview"]]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/easy_input_message.py b/src/openai/types/responses/easy_input_message.py
new file mode 100644
index 0000000000..4ed0194f9f
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = ["EasyInputMessage"]
+
+
+class EasyInputMessage(BaseModel):
+    content: Union[str, ResponseInputMessageContentList]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/easy_input_message_param.py b/src/openai/types/responses/easy_input_message_param.py
new file mode 100644
index 0000000000..ef2f1c5f37
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+
+__all__ = ["EasyInputMessageParam"]
+
+
+class EasyInputMessageParam(TypedDict, total=False):
+    content: Required[Union[str, ResponseInputMessageContentListParam]]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/file_search_tool.py b/src/openai/types/responses/file_search_tool.py
new file mode 100644
index 0000000000..dbdd8cffab
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..shared.compound_filter import CompoundFilter
+from ..shared.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchTool", "Filters", "RankingOptions"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None]
+
+
+class RankingOptions(BaseModel):
+    ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
+    """The ranker to use for the file search."""
+
+    score_threshold: Optional[float] = None
+    """The score threshold for the file search, a number between 0 and 1.
+
+    Numbers closer to 1 will attempt to return only the most relevant results, but
+    may return fewer results.
+    """
+
+
+class FileSearchTool(BaseModel):
+    type: Literal["file_search"]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: List[str]
+    """The IDs of the vector stores to search."""
+
+    filters: Optional[Filters] = None
+    """A filter to apply."""
+
+    max_num_results: Optional[int] = None
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: Optional[RankingOptions] = None
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/file_search_tool_param.py b/src/openai/types/responses/file_search_tool_param.py
new file mode 100644
index 0000000000..2851fae460
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool_param.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.compound_filter import CompoundFilter
+from ..shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(TypedDict, total=False):
+    ranker: Literal["auto", "default-2024-11-15"]
+    """The ranker to use for the file search."""
+
+    score_threshold: float
+    """The score threshold for the file search, a number between 0 and 1.
+
+    Numbers closer to 1 will attempt to return only the most relevant results, but
+    may return fewer results.
+    """
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: Required[List[str]]
+    """The IDs of the vector stores to search."""
+
+    filters: Optional[Filters]
+    """A filter to apply."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/function_tool.py b/src/openai/types/responses/function_tool.py
new file mode 100644
index 0000000000..d881565356
--- /dev/null
+++ b/src/openai/types/responses/function_tool.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    name: str
+    """The name of the function to call."""
+
+    parameters: Optional[Dict[str, object]] = None
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: Optional[bool] = None
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Literal["function"]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str] = None
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/function_tool_param.py b/src/openai/types/responses/function_tool_param.py
new file mode 100644
index 0000000000..56bab36f47
--- /dev/null
+++ b/src/openai/types/responses/function_tool_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+    parameters: Required[Optional[Dict[str, object]]]
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: Required[Optional[bool]]
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Required[Literal["function"]]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str]
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/input_item_list_params.py b/src/openai/types/responses/input_item_list_params.py
new file mode 100644
index 0000000000..6a18d920cb
--- /dev/null
+++ b/src/openai/types/responses/input_item_list_params.py
@@ -0,0 +1,37 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+from .response_includable import ResponseIncludable
+
+__all__ = ["InputItemListParams"]
+
+
+class InputItemListParams(TypedDict, total=False):
+    after: str
+    """An item ID to list items after, used in pagination."""
+
+    before: str
+    """An item ID to list items before, used in pagination."""
+
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for Response creation above for more information.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """The order to return the input items in. Default is `desc`.
+
+    - `asc`: Return the input items in ascending order.
+    - `desc`: Return the input items in descending order.
+    """
diff --git a/src/openai/types/responses/parsed_response.py b/src/openai/types/responses/parsed_response.py
new file mode 100644
index 0000000000..e59e86d2b7
--- /dev/null
+++ b/src/openai/types/responses/parsed_response.py
@@ -0,0 +1,95 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, List, Union, Generic, TypeVar, Optional
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response import Response
+from ..._models import GenericModel
+from ..._utils._transform import PropertyInfo
+from .response_output_item import (
+    McpCall,
+    McpListTools,
+    LocalShellCall,
+    McpApprovalRequest,
+    ImageGenerationCall,
+    LocalShellCallAction,
+)
+from .response_output_text import ResponseOutputText
+from .response_output_message import ResponseOutputMessage
+from .response_output_refusal import ResponseOutputRefusal
+from .response_reasoning_item import ResponseReasoningItem
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+
+__all__ = ["ParsedResponse", "ParsedResponseOutputMessage", "ParsedResponseOutputText"]
+
+ContentType = TypeVar("ContentType")
+
+# we need to disable this check because we're overriding properties
+# with subclasses of their types which is technically unsound as
+# properties can be mutated.
+# pyright: reportIncompatibleVariableOverride=false
+
+
+class ParsedResponseOutputText(ResponseOutputText, GenericModel, Generic[ContentType]):
+    parsed: Optional[ContentType] = None
+
+
+ParsedContent: TypeAlias = Annotated[
+    Union[ParsedResponseOutputText[ContentType], ResponseOutputRefusal],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ParsedResponseOutputMessage(ResponseOutputMessage, GenericModel, Generic[ContentType]):
+    if TYPE_CHECKING:
+        content: List[ParsedContent[ContentType]]  # type: ignore[assignment]
+    else:
+        content: List[ParsedContent]
+
+
+class ParsedResponseFunctionToolCall(ResponseFunctionToolCall):
+    parsed_arguments: object = None
+
+    __api_exclude__ = {"parsed_arguments"}
+
+
+ParsedResponseOutputItem: TypeAlias = Annotated[
+    Union[
+        ParsedResponseOutputMessage[ContentType],
+        ParsedResponseFunctionToolCall,
+        ResponseFileSearchToolCall,
+        ResponseFunctionWebSearch,
+        ResponseComputerToolCall,
+        ResponseReasoningItem,
+        McpCall,
+        McpApprovalRequest,
+        ImageGenerationCall,
+        LocalShellCall,
+        LocalShellCallAction,
+        McpListTools,
+        ResponseCodeInterpreterToolCall,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ParsedResponse(Response, GenericModel, Generic[ContentType]):
+    if TYPE_CHECKING:
+        output: List[ParsedResponseOutputItem[ContentType]]  # type: ignore[assignment]
+    else:
+        output: List[ParsedResponseOutputItem]
+
+    @property
+    def output_parsed(self) -> Optional[ContentType]:
+        for output in self.output:
+            if output.type == "message":
+                for content in output.content:
+                    if content.type == "output_text" and content.parsed:
+                        return content.parsed
+
+        return None
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
new file mode 100644
index 0000000000..db85d87f4e
--- /dev/null
+++ b/src/openai/types/responses/response.py
@@ -0,0 +1,253 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from .tool import Tool
+from ..._models import BaseModel
+from .response_error import ResponseError
+from .response_usage import ResponseUsage
+from .response_prompt import ResponsePrompt
+from .response_status import ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp
+from ..shared.metadata import Metadata
+from ..shared.reasoning import Reasoning
+from .tool_choice_types import ToolChoiceTypes
+from .response_input_item import ResponseInputItem
+from .tool_choice_options import ToolChoiceOptions
+from .response_output_item import ResponseOutputItem
+from .response_text_config import ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction
+from ..shared.responses_model import ResponsesModel
+
+__all__ = ["Response", "IncompleteDetails", "ToolChoice"]
+
+
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_output_tokens", "content_filter"]] = None
+    """The reason why the response is incomplete."""
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction, ToolChoiceMcp]
+
+
+class Response(BaseModel):
+    id: str
+    """Unique identifier for this Response."""
+
+    created_at: float
+    """Unix timestamp (in seconds) of when this Response was created."""
+
+    error: Optional[ResponseError] = None
+    """An error object returned when the model fails to generate a Response."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details about why the response is incomplete."""
+
+    instructions: Union[str, List[ResponseInputItem], None] = None
+    """A system (or developer) message inserted into the model's context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will not be carried over to the next response. This makes it simple to
+    swap out system (or developer) messages in new responses.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: ResponsesModel
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    object: Literal["response"]
+    """The object type of this resource - always set to `response`."""
+
+    output: List[ResponseOutputItem]
+    """An array of content items generated by the model.
+
+    - The length and order of items in the `output` array is dependent on the
+      model's response.
+    - Rather than accessing the first item in the `output` array and assuming it's
+      an `assistant` message with the content generated by the model, you might
+      consider using the `output_text` property where supported in SDKs.
+    """
+
+    parallel_tool_calls: bool
+    """Whether to allow the model to run tool calls in parallel."""
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: List[Tool]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    background: Optional[bool] = None
+    """Whether to run the model response in the background.
+
+    [Learn more](https://platform.openai.com/docs/guides/background).
+    """
+
+    max_output_tokens: Optional[int] = None
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    max_tool_calls: Optional[int] = None
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
+    previous_response_id: Optional[str] = None
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    prompt: Optional[ResponsePrompt] = None
+    """Reference to a prompt template and its variables.
+
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    reasoning: Optional[Reasoning] = None
+    """**o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    status: Optional[ResponseStatus] = None
+    """The status of the response generation.
+
+    One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or
+    `incomplete`.
+    """
+
+    text: Optional[ResponseTextConfig] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    top_logprobs: Optional[int] = None
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]] = None
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the context of this response and previous ones exceeds the model's
+      context window size, the model will truncate the response to fit the context
+      window by dropping input items in the middle of the conversation.
+    - `disabled` (default): If a model response will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    usage: Optional[ResponseUsage] = None
+    """
+    Represents token usage details including input tokens, output tokens, a
+    breakdown of output tokens, and the total tokens used.
+    """
+
+    user: Optional[str] = None
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+    @property
+    def output_text(self) -> str:
+        """Convenience property that aggregates all `output_text` items from the `output`
+        list.
+
+        If no `output_text` content blocks exist, then an empty string is returned.
+        """
+        texts: List[str] = []
+        for output in self.output:
+            if output.type == "message":
+                for content in output.content:
+                    if content.type == "output_text":
+                        texts.append(content.text)
+
+        return "".join(texts)
diff --git a/src/openai/types/responses/response_audio_delta_event.py b/src/openai/types/responses/response_audio_delta_event.py
new file mode 100644
index 0000000000..6fb7887b80
--- /dev/null
+++ b/src/openai/types/responses/response_audio_delta_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    delta: str
+    """A chunk of Base64 encoded response audio bytes."""
+
+    sequence_number: int
+    """A sequence number for this chunk of the stream response."""
+
+    type: Literal["response.audio.delta"]
+    """The type of the event. Always `response.audio.delta`."""
diff --git a/src/openai/types/responses/response_audio_done_event.py b/src/openai/types/responses/response_audio_done_event.py
new file mode 100644
index 0000000000..2592ae8dcd
--- /dev/null
+++ b/src/openai/types/responses/response_audio_done_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of the delta."""
+
+    type: Literal["response.audio.done"]
+    """The type of the event. Always `response.audio.done`."""
diff --git a/src/openai/types/responses/response_audio_transcript_delta_event.py b/src/openai/types/responses/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..830c133d61
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_delta_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    delta: str
+    """The partial transcript of the audio response."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.audio.transcript.delta"]
+    """The type of the event. Always `response.audio.transcript.delta`."""
diff --git a/src/openai/types/responses/response_audio_transcript_done_event.py b/src/openai/types/responses/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..e39f501cf0
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_done_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.audio.transcript.done"]
+    """The type of the event. Always `response.audio.transcript.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
new file mode 100644
index 0000000000..c5fef939b1
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDeltaEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDeltaEvent(BaseModel):
+    delta: str
+    """The partial code snippet being streamed by the code interpreter."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code is being
+    streamed.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call_code.delta"]
+    """The type of the event. Always `response.code_interpreter_call_code.delta`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_done_event.py b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
new file mode 100644
index 0000000000..5201a02d36
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDoneEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDoneEvent(BaseModel):
+    code: str
+    """The final code snippet output by the code interpreter."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """The index of the output item in the response for which the code is finalized."""
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call_code.done"]
+    """The type of the event. Always `response.code_interpreter_call_code.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_completed_event.py b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
new file mode 100644
index 0000000000..bb9563a16b
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCompletedEvent"]
+
+
+class ResponseCodeInterpreterCallCompletedEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter call
+    is completed.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.completed"]
+    """The type of the event. Always `response.code_interpreter_call.completed`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
new file mode 100644
index 0000000000..9c6b221004
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallInProgressEvent"]
+
+
+class ResponseCodeInterpreterCallInProgressEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter call
+    is in progress.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.in_progress"]
+    """The type of the event. Always `response.code_interpreter_call.in_progress`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
new file mode 100644
index 0000000000..f6191e4165
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallInterpretingEvent"]
+
+
+class ResponseCodeInterpreterCallInterpretingEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter is
+    interpreting code.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.interpreting"]
+    """The type of the event. Always `response.code_interpreter_call.interpreting`."""
diff --git a/src/openai/types/responses/response_code_interpreter_tool_call.py b/src/openai/types/responses/response_code_interpreter_tool_call.py
new file mode 100644
index 0000000000..7e4dc9f984
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_tool_call.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterToolCall", "Output", "OutputLogs", "OutputImage"]
+
+
+class OutputLogs(BaseModel):
+    logs: str
+    """The logs output from the code interpreter."""
+
+    type: Literal["logs"]
+    """The type of the output. Always 'logs'."""
+
+
+class OutputImage(BaseModel):
+    type: Literal["image"]
+    """The type of the output. Always 'image'."""
+
+    url: str
+    """The URL of the image output from the code interpreter."""
+
+
+Output: TypeAlias = Annotated[Union[OutputLogs, OutputImage], PropertyInfo(discriminator="type")]
+
+
+class ResponseCodeInterpreterToolCall(BaseModel):
+    id: str
+    """The unique ID of the code interpreter tool call."""
+
+    code: Optional[str] = None
+    """The code to run, or null if not available."""
+
+    container_id: str
+    """The ID of the container used to run the code."""
+
+    outputs: Optional[List[Output]] = None
+    """The outputs generated by the code interpreter, such as logs or images.
+
+    Can be null if no outputs are available.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete", "interpreting", "failed"]
+    """The status of the code interpreter tool call."""
+
+    type: Literal["code_interpreter_call"]
+    """The type of the code interpreter tool call. Always `code_interpreter_call`."""
diff --git a/src/openai/types/responses/response_code_interpreter_tool_call_param.py b/src/openai/types/responses/response_code_interpreter_tool_call_param.py
new file mode 100644
index 0000000000..69e01f99ed
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_tool_call_param.py
@@ -0,0 +1,50 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["ResponseCodeInterpreterToolCallParam", "Output", "OutputLogs", "OutputImage"]
+
+
+class OutputLogs(TypedDict, total=False):
+    logs: Required[str]
+    """The logs output from the code interpreter."""
+
+    type: Required[Literal["logs"]]
+    """The type of the output. Always 'logs'."""
+
+
+class OutputImage(TypedDict, total=False):
+    type: Required[Literal["image"]]
+    """The type of the output. Always 'image'."""
+
+    url: Required[str]
+    """The URL of the image output from the code interpreter."""
+
+
+Output: TypeAlias = Union[OutputLogs, OutputImage]
+
+
+class ResponseCodeInterpreterToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the code interpreter tool call."""
+
+    code: Required[Optional[str]]
+    """The code to run, or null if not available."""
+
+    container_id: Required[str]
+    """The ID of the container used to run the code."""
+
+    outputs: Required[Optional[Iterable[Output]]]
+    """The outputs generated by the code interpreter, such as logs or images.
+
+    Can be null if no outputs are available.
+    """
+
+    status: Required[Literal["in_progress", "completed", "incomplete", "interpreting", "failed"]]
+    """The status of the code interpreter tool call."""
+
+    type: Required[Literal["code_interpreter_call"]]
+    """The type of the code interpreter tool call. Always `code_interpreter_call`."""
diff --git a/src/openai/types/responses/response_completed_event.py b/src/openai/types/responses/response_completed_event.py
new file mode 100644
index 0000000000..8a2bd51f75
--- /dev/null
+++ b/src/openai/types/responses/response_completed_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedEvent"]
+
+
+class ResponseCompletedEvent(BaseModel):
+    response: Response
+    """Properties of the completed response."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
diff --git a/src/openai/types/responses/response_computer_tool_call.py b/src/openai/types/responses/response_computer_tool_call.py
new file mode 100644
index 0000000000..994837567a
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseComputerToolCall",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(BaseModel):
+    button: Literal["left", "right", "wheel", "back", "forward"]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Literal["click"]
+    """Specifies the event type.
+
+    For a click action, this property is always set to `click`.
+    """
+
+    x: int
+    """The x-coordinate where the click occurred."""
+
+    y: int
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(BaseModel):
+    type: Literal["double_click"]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: int
+    """The x-coordinate where the double click occurred."""
+
+    y: int
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(BaseModel):
+    x: int
+    """The x-coordinate."""
+
+    y: int
+    """The y-coordinate."""
+
+
+class ActionDrag(BaseModel):
+    path: List[ActionDragPath]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Literal["drag"]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(BaseModel):
+    keys: List[str]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Literal["keypress"]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(BaseModel):
+    type: Literal["move"]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: int
+    """The x-coordinate to move to."""
+
+    y: int
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(BaseModel):
+    type: Literal["screenshot"]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(BaseModel):
+    scroll_x: int
+    """The horizontal scroll distance."""
+
+    scroll_y: int
+    """The vertical scroll distance."""
+
+    type: Literal["scroll"]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: int
+    """The x-coordinate where the scroll occurred."""
+
+    y: int
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(BaseModel):
+    text: str
+    """The text to type."""
+
+    type: Literal["type"]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(BaseModel):
+    type: Literal["wait"]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Annotated[
+    Union[
+        ActionClick,
+        ActionDoubleClick,
+        ActionDrag,
+        ActionKeypress,
+        ActionMove,
+        ActionScreenshot,
+        ActionScroll,
+        ActionType,
+        ActionWait,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PendingSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: str
+    """The type of the pending safety check."""
+
+    message: str
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCall(BaseModel):
+    id: str
+    """The unique ID of the computer call."""
+
+    action: Action
+    """A click action."""
+
+    call_id: str
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: List[PendingSafetyCheck]
+    """The pending safety checks for the computer call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["computer_call"]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_computer_tool_call_output_item.py b/src/openai/types/responses/response_computer_tool_call_output_item.py
new file mode 100644
index 0000000000..a2dd68f579
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_item.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_computer_tool_call_output_screenshot import ResponseComputerToolCallOutputScreenshot
+
+__all__ = ["ResponseComputerToolCallOutputItem", "AcknowledgedSafetyCheck"]
+
+
+class AcknowledgedSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: str
+    """The type of the pending safety check."""
+
+    message: str
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCallOutputItem(BaseModel):
+    id: str
+    """The unique ID of the computer call tool output."""
+
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: ResponseComputerToolCallOutputScreenshot
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    acknowledged_safety_checks: Optional[List[AcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
diff --git a/src/openai/types/responses/response_computer_tool_call_output_screenshot.py b/src/openai/types/responses/response_computer_tool_call_output_screenshot.py
new file mode 100644
index 0000000000..a500da85c1
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_screenshot.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseComputerToolCallOutputScreenshot"]
+
+
+class ResponseComputerToolCallOutputScreenshot(BaseModel):
+    type: Literal["computer_screenshot"]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: Optional[str] = None
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: Optional[str] = None
+    """The URL of the screenshot image."""
diff --git a/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py b/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py
new file mode 100644
index 0000000000..efc2028aa4
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseComputerToolCallOutputScreenshotParam"]
+
+
+class ResponseComputerToolCallOutputScreenshotParam(TypedDict, total=False):
+    type: Required[Literal["computer_screenshot"]]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: str
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: str
+    """The URL of the screenshot image."""
diff --git a/src/openai/types/responses/response_computer_tool_call_param.py b/src/openai/types/responses/response_computer_tool_call_param.py
new file mode 100644
index 0000000000..d4ef56ab5c
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_param.py
@@ -0,0 +1,208 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseComputerToolCallParam",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(TypedDict, total=False):
+    button: Required[Literal["left", "right", "wheel", "back", "forward"]]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Required[Literal["click"]]
+    """Specifies the event type.
+
+    For a click action, this property is always set to `click`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(TypedDict, total=False):
+    type: Required[Literal["double_click"]]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the double click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(TypedDict, total=False):
+    x: Required[int]
+    """The x-coordinate."""
+
+    y: Required[int]
+    """The y-coordinate."""
+
+
+class ActionDrag(TypedDict, total=False):
+    path: Required[Iterable[ActionDragPath]]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Required[Literal["drag"]]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(TypedDict, total=False):
+    keys: Required[List[str]]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Required[Literal["keypress"]]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(TypedDict, total=False):
+    type: Required[Literal["move"]]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: Required[int]
+    """The x-coordinate to move to."""
+
+    y: Required[int]
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(TypedDict, total=False):
+    type: Required[Literal["screenshot"]]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(TypedDict, total=False):
+    scroll_x: Required[int]
+    """The horizontal scroll distance."""
+
+    scroll_y: Required[int]
+    """The vertical scroll distance."""
+
+    type: Required[Literal["scroll"]]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the scroll occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(TypedDict, total=False):
+    text: Required[str]
+    """The text to type."""
+
+    type: Required[Literal["type"]]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(TypedDict, total=False):
+    type: Required[Literal["wait"]]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Union[
+    ActionClick,
+    ActionDoubleClick,
+    ActionDrag,
+    ActionKeypress,
+    ActionMove,
+    ActionScreenshot,
+    ActionScroll,
+    ActionType,
+    ActionWait,
+]
+
+
+class PendingSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Required[str]
+    """The type of the pending safety check."""
+
+    message: Required[str]
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the computer call."""
+
+    action: Required[Action]
+    """A click action."""
+
+    call_id: Required[str]
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: Required[Iterable[PendingSafetyCheck]]
+    """The pending safety checks for the computer call."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Required[Literal["computer_call"]]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_content_part_added_event.py b/src/openai/types/responses/response_content_part_added_event.py
new file mode 100644
index 0000000000..11e0ac7c92
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_added_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+Part: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    content_index: int
+    """The index of the content part that was added."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.content_part.added"]
+    """The type of the event. Always `response.content_part.added`."""
diff --git a/src/openai/types/responses/response_content_part_done_event.py b/src/openai/types/responses/response_content_part_done_event.py
new file mode 100644
index 0000000000..e1b411bb45
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_done_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+Part: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that is done."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that is done."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.content_part.done"]
+    """The type of the event. Always `response.content_part.done`."""
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
new file mode 100644
index 0000000000..0187e1fda8
--- /dev/null
+++ b/src/openai/types/responses/response_create_params.py
@@ -0,0 +1,259 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .tool_param import ToolParam
+from .response_includable import ResponseIncludable
+from .tool_choice_options import ToolChoiceOptions
+from .response_input_param import ResponseInputParam
+from .response_prompt_param import ResponsePromptParam
+from .tool_choice_mcp_param import ToolChoiceMcpParam
+from ..shared_params.metadata import Metadata
+from .tool_choice_types_param import ToolChoiceTypesParam
+from ..shared_params.reasoning import Reasoning
+from .response_text_config_param import ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam
+from ..shared_params.responses_model import ResponsesModel
+
+__all__ = [
+    "ResponseCreateParamsBase",
+    "ToolChoice",
+    "ResponseCreateParamsNonStreaming",
+    "ResponseCreateParamsStreaming",
+]
+
+
+class ResponseCreateParamsBase(TypedDict, total=False):
+    background: Optional[bool]
+    """Whether to run the model response in the background.
+
+    [Learn more](https://platform.openai.com/docs/guides/background).
+    """
+
+    include: Optional[List[ResponseIncludable]]
+    """Specify additional output data to include in the model response.
+
+    Currently supported values are:
+
+    - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+      in code interpreter tool call items.
+    - `computer_call_output.output.image_url`: Include image urls from the computer
+      call output.
+    - `file_search_call.results`: Include the search results of the file search tool
+      call.
+    - `message.input_image.image_url`: Include image urls from the input message.
+    - `message.output_text.logprobs`: Include logprobs with assistant messages.
+    - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+      tokens in reasoning item outputs. This enables reasoning items to be used in
+      multi-turn conversations when using the Responses API statelessly (like when
+      the `store` parameter is set to `false`, or when an organization is enrolled
+      in the zero data retention program).
+    """
+
+    input: Union[str, ResponseInputParam]
+    """Text, image, or file inputs to the model, used to generate a response.
+
+    Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Image inputs](https://platform.openai.com/docs/guides/images)
+    - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+    - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+    - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+    """
+
+    instructions: Optional[str]
+    """A system (or developer) message inserted into the model's context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will not be carried over to the next response. This makes it simple to
+    swap out system (or developer) messages in new responses.
+    """
+
+    max_output_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    max_tool_calls: Optional[int]
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: ResponsesModel
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to allow the model to run tool calls in parallel."""
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    prompt: Optional[ResponsePromptParam]
+    """Reference to a prompt template and its variables.
+
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    reasoning: Optional[Reasoning]
+    """**o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the requset will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      'priority', then the request will be processed with the corresponding service
+      tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+      Priority processing.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    store: Optional[bool]
+    """Whether to store the generated model response for later retrieval via API."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    text: ResponseTextConfigParam
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]]
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the context of this response and previous ones exceeds the model's
+      context window size, the model will truncate the response to fit the context
+      window by dropping input items in the middle of the conversation.
+    - `disabled` (default): If a model response will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    user: str
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam, ToolChoiceMcpParam]
+
+
+class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+class ResponseCreateParamsStreaming(ResponseCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming]
diff --git a/src/openai/types/responses/response_created_event.py b/src/openai/types/responses/response_created_event.py
new file mode 100644
index 0000000000..73a9d700d4
--- /dev/null
+++ b/src/openai/types/responses/response_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    response: Response
+    """The response that was created."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.created"]
+    """The type of the event. Always `response.created`."""
diff --git a/src/openai/types/responses/response_error.py b/src/openai/types/responses/response_error.py
new file mode 100644
index 0000000000..90f1fcf5da
--- /dev/null
+++ b/src/openai/types/responses/response_error.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseError"]
+
+
+class ResponseError(BaseModel):
+    code: Literal[
+        "server_error",
+        "rate_limit_exceeded",
+        "invalid_prompt",
+        "vector_store_timeout",
+        "invalid_image",
+        "invalid_image_format",
+        "invalid_base64_image",
+        "invalid_image_url",
+        "image_too_large",
+        "image_too_small",
+        "image_parse_error",
+        "image_content_policy_violation",
+        "invalid_image_mode",
+        "image_file_too_large",
+        "unsupported_image_media_type",
+        "empty_image_file",
+        "failed_to_download_image",
+        "image_file_not_found",
+    ]
+    """The error code for the response."""
+
+    message: str
+    """A human-readable description of the error."""
diff --git a/src/openai/types/responses/response_error_event.py b/src/openai/types/responses/response_error_event.py
new file mode 100644
index 0000000000..826c395125
--- /dev/null
+++ b/src/openai/types/responses/response_error_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseErrorEvent"]
+
+
+class ResponseErrorEvent(BaseModel):
+    code: Optional[str] = None
+    """The error code."""
+
+    message: str
+    """The error message."""
+
+    param: Optional[str] = None
+    """The error parameter."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["error"]
+    """The type of the event. Always `error`."""
diff --git a/src/openai/types/responses/response_failed_event.py b/src/openai/types/responses/response_failed_event.py
new file mode 100644
index 0000000000..cdd3d7d808
--- /dev/null
+++ b/src/openai/types/responses/response_failed_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedEvent"]
+
+
+class ResponseFailedEvent(BaseModel):
+    response: Response
+    """The response that failed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
diff --git a/src/openai/types/responses/response_file_search_call_completed_event.py b/src/openai/types/responses/response_file_search_call_completed_event.py
new file mode 100644
index 0000000000..08e51b2d3f
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_completed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallCompletedEvent"]
+
+
+class ResponseFileSearchCallCompletedEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.completed"]
+    """The type of the event. Always `response.file_search_call.completed`."""
diff --git a/src/openai/types/responses/response_file_search_call_in_progress_event.py b/src/openai/types/responses/response_file_search_call_in_progress_event.py
new file mode 100644
index 0000000000..63840a649f
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_in_progress_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallInProgressEvent"]
+
+
+class ResponseFileSearchCallInProgressEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.in_progress"]
+    """The type of the event. Always `response.file_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_file_search_call_searching_event.py b/src/openai/types/responses/response_file_search_call_searching_event.py
new file mode 100644
index 0000000000..706c8c57ad
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_searching_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallSearchingEvent"]
+
+
+class ResponseFileSearchCallSearchingEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is searching."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.searching"]
+    """The type of the event. Always `response.file_search_call.searching`."""
diff --git a/src/openai/types/responses/response_file_search_tool_call.py b/src/openai/types/responses/response_file_search_tool_call.py
new file mode 100644
index 0000000000..ef1c6a5608
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchToolCall", "Result"]
+
+
+class Result(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: Optional[str] = None
+    """The unique ID of the file."""
+
+    filename: Optional[str] = None
+    """The name of the file."""
+
+    score: Optional[float] = None
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: Optional[str] = None
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCall(BaseModel):
+    id: str
+    """The unique ID of the file search tool call."""
+
+    queries: List[str]
+    """The queries used to search for files."""
+
+    status: Literal["in_progress", "searching", "completed", "incomplete", "failed"]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Literal["file_search_call"]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[List[Result]] = None
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_file_search_tool_call_param.py b/src/openai/types/responses/response_file_search_tool_call_param.py
new file mode 100644
index 0000000000..9a4177cf81
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call_param.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFileSearchToolCallParam", "Result"]
+
+
+class Result(TypedDict, total=False):
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: str
+    """The unique ID of the file."""
+
+    filename: str
+    """The name of the file."""
+
+    score: float
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: str
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the file search tool call."""
+
+    queries: Required[List[str]]
+    """The queries used to search for files."""
+
+    status: Required[Literal["in_progress", "searching", "completed", "incomplete", "failed"]]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Required[Literal["file_search_call"]]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[Iterable[Result]]
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_format_text_config.py b/src/openai/types/responses/response_format_text_config.py
new file mode 100644
index 0000000000..a4896bf9fe
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config import ResponseFormatTextJSONSchemaConfig
+
+__all__ = ["ResponseFormatTextConfig"]
+
+ResponseFormatTextConfig: TypeAlias = Annotated[
+    Union[ResponseFormatText, ResponseFormatTextJSONSchemaConfig, ResponseFormatJSONObject],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_format_text_config_param.py b/src/openai/types/responses/response_format_text_config_param.py
new file mode 100644
index 0000000000..fcaf8f3fb6
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config_param import ResponseFormatTextJSONSchemaConfigParam
+
+__all__ = ["ResponseFormatTextConfigParam"]
+
+ResponseFormatTextConfigParam: TypeAlias = Union[
+    ResponseFormatText, ResponseFormatTextJSONSchemaConfigParam, ResponseFormatJSONObject
+]
diff --git a/src/openai/types/responses/response_format_text_json_schema_config.py b/src/openai/types/responses/response_format_text_json_schema_config.py
new file mode 100644
index 0000000000..001fcf5bab
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatTextJSONSchemaConfig"]
+
+
+class ResponseFormatTextJSONSchemaConfig(BaseModel):
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_format_text_json_schema_config_param.py b/src/openai/types/responses/response_format_text_json_schema_config_param.py
new file mode 100644
index 0000000000..f293a80c5a
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatTextJSONSchemaConfigParam"]
+
+
+class ResponseFormatTextJSONSchemaConfigParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    schema: Required[Dict[str, object]]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_function_call_arguments_delta_event.py b/src/openai/types/responses/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..c6bc5dfad7
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_delta_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    delta: str
+    """The function-call arguments delta that is added."""
+
+    item_id: str
+    """The ID of the output item that the function-call arguments delta is added to."""
+
+    output_index: int
+    """
+    The index of the output item that the function-call arguments delta is added to.
+    """
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The type of the event. Always `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/responses/response_function_call_arguments_done_event.py b/src/openai/types/responses/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..875e7a6875
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_done_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    arguments: str
+    """The function-call arguments."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.function_call_arguments.done"]
diff --git a/src/openai/types/responses/response_function_tool_call.py b/src/openai/types/responses/response_function_tool_call.py
new file mode 100644
index 0000000000..2a8482204e
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionToolCall"]
+
+
+class ResponseFunctionToolCall(BaseModel):
+    arguments: str
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    name: str
+    """The name of the function to run."""
+
+    type: Literal["function_call"]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_tool_call_item.py b/src/openai/types/responses/response_function_tool_call_item.py
new file mode 100644
index 0000000000..762015a4b1
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_item.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .response_function_tool_call import ResponseFunctionToolCall
+
+__all__ = ["ResponseFunctionToolCallItem"]
+
+
+class ResponseFunctionToolCallItem(ResponseFunctionToolCall):
+    id: str  # type: ignore
+    """The unique ID of the function tool call."""
diff --git a/src/openai/types/responses/response_function_tool_call_output_item.py b/src/openai/types/responses/response_function_tool_call_output_item.py
new file mode 100644
index 0000000000..4c8c41a6fe
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_output_item.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionToolCallOutputItem"]
+
+
+class ResponseFunctionToolCallOutputItem(BaseModel):
+    id: str
+    """The unique ID of the function call tool output."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the function tool call."""
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_tool_call_param.py b/src/openai/types/responses/response_function_tool_call_param.py
new file mode 100644
index 0000000000..eaa263cf67
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFunctionToolCallParam"]
+
+
+class ResponseFunctionToolCallParam(TypedDict, total=False):
+    arguments: Required[str]
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    name: Required[str]
+    """The name of the function to run."""
+
+    type: Required[Literal["function_call"]]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: str
+    """The unique ID of the function tool call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_web_search.py b/src/openai/types/responses/response_function_web_search.py
new file mode 100644
index 0000000000..a3252956e9
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionWebSearch", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(BaseModel):
+    query: str
+    """The search query."""
+
+    type: Literal["search"]
+    """The action type."""
+
+
+class ActionOpenPage(BaseModel):
+    type: Literal["open_page"]
+    """The action type."""
+
+    url: str
+    """The URL opened by the model."""
+
+
+class ActionFind(BaseModel):
+    pattern: str
+    """The pattern or text to search for within the page."""
+
+    type: Literal["find"]
+    """The action type."""
+
+    url: str
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Annotated[Union[ActionSearch, ActionOpenPage, ActionFind], PropertyInfo(discriminator="type")]
+
+
+class ResponseFunctionWebSearch(BaseModel):
+    id: str
+    """The unique ID of the web search tool call."""
+
+    action: Action
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
+    status: Literal["in_progress", "searching", "completed", "failed"]
+    """The status of the web search tool call."""
+
+    type: Literal["web_search_call"]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_function_web_search_param.py b/src/openai/types/responses/response_function_web_search_param.py
new file mode 100644
index 0000000000..4a06132cf4
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search_param.py
@@ -0,0 +1,55 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["ResponseFunctionWebSearchParam", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(TypedDict, total=False):
+    query: Required[str]
+    """The search query."""
+
+    type: Required[Literal["search"]]
+    """The action type."""
+
+
+class ActionOpenPage(TypedDict, total=False):
+    type: Required[Literal["open_page"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL opened by the model."""
+
+
+class ActionFind(TypedDict, total=False):
+    pattern: Required[str]
+    """The pattern or text to search for within the page."""
+
+    type: Required[Literal["find"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Union[ActionSearch, ActionOpenPage, ActionFind]
+
+
+class ResponseFunctionWebSearchParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the web search tool call."""
+
+    action: Required[Action]
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
+    status: Required[Literal["in_progress", "searching", "completed", "failed"]]
+    """The status of the web search tool call."""
+
+    type: Required[Literal["web_search_call"]]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_image_gen_call_completed_event.py b/src/openai/types/responses/response_image_gen_call_completed_event.py
new file mode 100644
index 0000000000..a554273ed0
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_completed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallCompletedEvent"]
+
+
+class ResponseImageGenCallCompletedEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.image_generation_call.completed"]
+    """The type of the event. Always 'response.image_generation_call.completed'."""
diff --git a/src/openai/types/responses/response_image_gen_call_generating_event.py b/src/openai/types/responses/response_image_gen_call_generating_event.py
new file mode 100644
index 0000000000..74b4f57333
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_generating_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallGeneratingEvent"]
+
+
+class ResponseImageGenCallGeneratingEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.generating"]
+    """The type of the event. Always 'response.image_generation_call.generating'."""
diff --git a/src/openai/types/responses/response_image_gen_call_in_progress_event.py b/src/openai/types/responses/response_image_gen_call_in_progress_event.py
new file mode 100644
index 0000000000..b36ff5fa47
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_in_progress_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallInProgressEvent"]
+
+
+class ResponseImageGenCallInProgressEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.in_progress"]
+    """The type of the event. Always 'response.image_generation_call.in_progress'."""
diff --git a/src/openai/types/responses/response_image_gen_call_partial_image_event.py b/src/openai/types/responses/response_image_gen_call_partial_image_event.py
new file mode 100644
index 0000000000..e69c95fb33
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_partial_image_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallPartialImageEvent"]
+
+
+class ResponseImageGenCallPartialImageEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    partial_image_b64: str
+    """Base64-encoded partial image data, suitable for rendering as an image."""
+
+    partial_image_index: int
+    """
+    0-based index for the partial image (backend is 1-based, but this is 0-based for
+    the user).
+    """
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.partial_image"]
+    """The type of the event. Always 'response.image_generation_call.partial_image'."""
diff --git a/src/openai/types/responses/response_in_progress_event.py b/src/openai/types/responses/response_in_progress_event.py
new file mode 100644
index 0000000000..b82e10b357
--- /dev/null
+++ b/src/openai/types/responses/response_in_progress_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseInProgressEvent"]
+
+
+class ResponseInProgressEvent(BaseModel):
+    response: Response
+    """The response that is in progress."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.in_progress"]
+    """The type of the event. Always `response.in_progress`."""
diff --git a/src/openai/types/responses/response_includable.py b/src/openai/types/responses/response_includable.py
new file mode 100644
index 0000000000..c17a02560f
--- /dev/null
+++ b/src/openai/types/responses/response_includable.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseIncludable"]
+
+ResponseIncludable: TypeAlias = Literal[
+    "code_interpreter_call.outputs",
+    "computer_call_output.output.image_url",
+    "file_search_call.results",
+    "message.input_image.image_url",
+    "message.output_text.logprobs",
+    "reasoning.encrypted_content",
+]
diff --git a/src/openai/types/responses/response_incomplete_event.py b/src/openai/types/responses/response_incomplete_event.py
new file mode 100644
index 0000000000..63c969a428
--- /dev/null
+++ b/src/openai/types/responses/response_incomplete_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteEvent"]
+
+
+class ResponseIncompleteEvent(BaseModel):
+    response: Response
+    """The response that was incomplete."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
diff --git a/src/openai/types/responses/response_input_content.py b/src/openai/types/responses/response_input_content.py
new file mode 100644
index 0000000000..1726909a17
--- /dev/null
+++ b/src/openai/types/responses/response_input_content.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponseInputContent"]
+
+ResponseInputContent: TypeAlias = Annotated[
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/responses/response_input_content_param.py b/src/openai/types/responses/response_input_content_param.py
new file mode 100644
index 0000000000..7791cdfd8e
--- /dev/null
+++ b/src/openai/types/responses/response_input_content_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
diff --git a/src/openai/types/responses/response_input_file.py b/src/openai/types/responses/response_input_file.py
new file mode 100644
index 0000000000..00b35dc844
--- /dev/null
+++ b/src/openai/types/responses/response_input_file.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputFile"]
+
+
+class ResponseInputFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The content of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_file_param.py b/src/openai/types/responses/response_input_file_param.py
new file mode 100644
index 0000000000..61ae46f0cb
--- /dev/null
+++ b/src/openai/types/responses/response_input_file_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputFileParam"]
+
+
+class ResponseInputFileParam(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The content of the file to be sent to the model."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_image.py b/src/openai/types/responses/response_input_image.py
new file mode 100644
index 0000000000..f2d760b25e
--- /dev/null
+++ b/src/openai/types/responses/response_input_image.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputImage"]
+
+
+class ResponseInputImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Literal["input_image"]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_image_param.py b/src/openai/types/responses/response_input_image_param.py
new file mode 100644
index 0000000000..bc17e4f1c2
--- /dev/null
+++ b/src/openai/types/responses/response_input_image_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputImageParam"]
+
+
+class ResponseInputImageParam(TypedDict, total=False):
+    detail: Required[Literal["low", "high", "auto"]]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Required[Literal["input_image"]]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str]
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_item.py b/src/openai/types/responses/response_input_item.py
new file mode 100644
index 0000000000..5fbd7c274b
--- /dev/null
+++ b/src/openai/types/responses/response_input_item.py
@@ -0,0 +1,305 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .easy_input_message import EasyInputMessage
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList
+from .response_computer_tool_call_output_screenshot import ResponseComputerToolCallOutputScreenshot
+
+__all__ = [
+    "ResponseInputItem",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(BaseModel):
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: Optional[str] = None
+    """The type of the pending safety check."""
+
+    message: Optional[str] = None
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(BaseModel):
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: ResponseComputerToolCallOutputScreenshot
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str] = None
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[List[ComputerCallOutputAcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(BaseModel):
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the function tool call."""
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(BaseModel):
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(BaseModel):
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str] = None
+    """The unique ID of the approval response"""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+
+class ItemReference(BaseModel):
+    id: str
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]] = None
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItem: TypeAlias = Annotated[
+    Union[
+        EasyInputMessage,
+        Message,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        ComputerCallOutput,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCall,
+        FunctionCallOutput,
+        ResponseReasoningItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+        ItemReference,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py
new file mode 100644
index 0000000000..70cd9116a9
--- /dev/null
+++ b/src/openai/types/responses/response_input_item_param.py
@@ -0,0 +1,302 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+from .response_computer_tool_call_output_screenshot_param import ResponseComputerToolCallOutputScreenshotParam
+
+__all__ = [
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Optional[str]
+    """The type of the pending safety check."""
+
+    message: Optional[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ResponseComputerToolCallOutputScreenshotParam]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str]
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[Iterable[ComputerCallOutputAcknowledgedSafetyCheck]]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the image generation call."""
+
+    result: Required[Optional[str]]
+    """The generated image encoded in base64."""
+
+    status: Required[Literal["in_progress", "completed", "generating", "failed"]]
+    """The status of the image generation call."""
+
+    type: Required[Literal["image_generation_call"]]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(TypedDict, total=False):
+    command: Required[List[str]]
+    """The command to run."""
+
+    env: Required[Dict[str, str]]
+    """Environment variables to set for the command."""
+
+    type: Required[Literal["exec"]]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int]
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str]
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str]
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the local shell call."""
+
+    action: Required[LocalShellCallAction]
+    """Execute a shell command on the server."""
+
+    call_id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the local shell call."""
+
+    type: Required[Literal["local_shell_call"]]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Required[Literal["local_shell_call_output"]]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(TypedDict, total=False):
+    input_schema: Required[object]
+    """The JSON schema describing the tool's input."""
+
+    name: Required[str]
+    """The name of the tool."""
+
+    annotations: Optional[object]
+    """Additional annotations about the tool."""
+
+    description: Optional[str]
+    """The description of the tool."""
+
+
+class McpListTools(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the list."""
+
+    server_label: Required[str]
+    """The label of the MCP server."""
+
+    tools: Required[Iterable[McpListToolsTool]]
+    """The tools available on the server."""
+
+    type: Required[Literal["mcp_list_tools"]]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str]
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the approval request."""
+
+    arguments: Required[str]
+    """A JSON string of arguments for the tool."""
+
+    name: Required[str]
+    """The name of the tool to run."""
+
+    server_label: Required[str]
+    """The label of the MCP server making the request."""
+
+    type: Required[Literal["mcp_approval_request"]]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(TypedDict, total=False):
+    approval_request_id: Required[str]
+    """The ID of the approval request being answered."""
+
+    approve: Required[bool]
+    """Whether the request was approved."""
+
+    type: Required[Literal["mcp_approval_response"]]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str]
+    """The unique ID of the approval response"""
+
+    reason: Optional[str]
+    """Optional reason for the decision."""
+
+
+class McpCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the tool call."""
+
+    arguments: Required[str]
+    """A JSON string of the arguments passed to the tool."""
+
+    name: Required[str]
+    """The name of the tool that was run."""
+
+    server_label: Required[str]
+    """The label of the MCP server running the tool."""
+
+    type: Required[Literal["mcp_call"]]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str]
+    """The error from the tool call, if any."""
+
+    output: Optional[str]
+    """The output from the tool call."""
+
+
+class ItemReference(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ImageGenerationCall,
+    ResponseCodeInterpreterToolCallParam,
+    LocalShellCall,
+    LocalShellCallOutput,
+    McpListTools,
+    McpApprovalRequest,
+    McpApprovalResponse,
+    McpCall,
+    ItemReference,
+]
diff --git a/src/openai/types/responses/response_input_message_content_list.py b/src/openai/types/responses/response_input_message_content_list.py
new file mode 100644
index 0000000000..99b7c10f12
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .response_input_content import ResponseInputContent
+
+__all__ = ["ResponseInputMessageContentList"]
+
+ResponseInputMessageContentList: TypeAlias = List[ResponseInputContent]
diff --git a/src/openai/types/responses/response_input_message_content_list_param.py b/src/openai/types/responses/response_input_message_content_list_param.py
new file mode 100644
index 0000000000..080613df0d
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputMessageContentListParam", "ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+ResponseInputMessageContentListParam: TypeAlias = List[ResponseInputContentParam]
diff --git a/src/openai/types/responses/response_input_message_item.py b/src/openai/types/responses/response_input_message_item.py
new file mode 100644
index 0000000000..6a788e7fa4
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_item.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = ["ResponseInputMessageItem"]
+
+
+class ResponseInputMessageItem(BaseModel):
+    id: str
+    """The unique ID of the message input."""
+
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py
new file mode 100644
index 0000000000..024998671f
--- /dev/null
+++ b/src/openai/types/responses/response_input_param.py
@@ -0,0 +1,305 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+from .response_computer_tool_call_output_screenshot_param import ResponseComputerToolCallOutputScreenshotParam
+
+__all__ = [
+    "ResponseInputParam",
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Optional[str]
+    """The type of the pending safety check."""
+
+    message: Optional[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ResponseComputerToolCallOutputScreenshotParam]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str]
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[Iterable[ComputerCallOutputAcknowledgedSafetyCheck]]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the image generation call."""
+
+    result: Required[Optional[str]]
+    """The generated image encoded in base64."""
+
+    status: Required[Literal["in_progress", "completed", "generating", "failed"]]
+    """The status of the image generation call."""
+
+    type: Required[Literal["image_generation_call"]]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(TypedDict, total=False):
+    command: Required[List[str]]
+    """The command to run."""
+
+    env: Required[Dict[str, str]]
+    """Environment variables to set for the command."""
+
+    type: Required[Literal["exec"]]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int]
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str]
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str]
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the local shell call."""
+
+    action: Required[LocalShellCallAction]
+    """Execute a shell command on the server."""
+
+    call_id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the local shell call."""
+
+    type: Required[Literal["local_shell_call"]]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Required[Literal["local_shell_call_output"]]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(TypedDict, total=False):
+    input_schema: Required[object]
+    """The JSON schema describing the tool's input."""
+
+    name: Required[str]
+    """The name of the tool."""
+
+    annotations: Optional[object]
+    """Additional annotations about the tool."""
+
+    description: Optional[str]
+    """The description of the tool."""
+
+
+class McpListTools(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the list."""
+
+    server_label: Required[str]
+    """The label of the MCP server."""
+
+    tools: Required[Iterable[McpListToolsTool]]
+    """The tools available on the server."""
+
+    type: Required[Literal["mcp_list_tools"]]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str]
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the approval request."""
+
+    arguments: Required[str]
+    """A JSON string of arguments for the tool."""
+
+    name: Required[str]
+    """The name of the tool to run."""
+
+    server_label: Required[str]
+    """The label of the MCP server making the request."""
+
+    type: Required[Literal["mcp_approval_request"]]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(TypedDict, total=False):
+    approval_request_id: Required[str]
+    """The ID of the approval request being answered."""
+
+    approve: Required[bool]
+    """Whether the request was approved."""
+
+    type: Required[Literal["mcp_approval_response"]]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str]
+    """The unique ID of the approval response"""
+
+    reason: Optional[str]
+    """Optional reason for the decision."""
+
+
+class McpCall(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the tool call."""
+
+    arguments: Required[str]
+    """A JSON string of the arguments passed to the tool."""
+
+    name: Required[str]
+    """The name of the tool that was run."""
+
+    server_label: Required[str]
+    """The label of the MCP server running the tool."""
+
+    type: Required[Literal["mcp_call"]]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str]
+    """The error from the tool call, if any."""
+
+    output: Optional[str]
+    """The output from the tool call."""
+
+
+class ItemReference(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ImageGenerationCall,
+    ResponseCodeInterpreterToolCallParam,
+    LocalShellCall,
+    LocalShellCallOutput,
+    McpListTools,
+    McpApprovalRequest,
+    McpApprovalResponse,
+    McpCall,
+    ItemReference,
+]
+
+ResponseInputParam: TypeAlias = List[ResponseInputItemParam]
diff --git a/src/openai/types/responses/response_input_text.py b/src/openai/types/responses/response_input_text.py
new file mode 100644
index 0000000000..ba8d1ea18b
--- /dev/null
+++ b/src/openai/types/responses/response_input_text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputText"]
+
+
+class ResponseInputText(BaseModel):
+    text: str
+    """The text input to the model."""
+
+    type: Literal["input_text"]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_input_text_param.py b/src/openai/types/responses/response_input_text_param.py
new file mode 100644
index 0000000000..f2ba834082
--- /dev/null
+++ b/src/openai/types/responses/response_input_text_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputTextParam"]
+
+
+class ResponseInputTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text input to the model."""
+
+    type: Required[Literal["input_text"]]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_item.py b/src/openai/types/responses/response_item.py
new file mode 100644
index 0000000000..cba89390ed
--- /dev/null
+++ b/src/openai/types/responses/response_item.py
@@ -0,0 +1,205 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_message import ResponseOutputMessage
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_input_message_item import ResponseInputMessageItem
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_function_tool_call_item import ResponseFunctionToolCallItem
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_computer_tool_call_output_item import ResponseComputerToolCallOutputItem
+from .response_function_tool_call_output_item import ResponseFunctionToolCallOutputItem
+
+__all__ = [
+    "ResponseItem",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+]
+
+
+class ImageGenerationCall(BaseModel):
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(BaseModel):
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    id: str
+    """The unique ID of the approval response"""
+
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+
+ResponseItem: TypeAlias = Annotated[
+    Union[
+        ResponseInputMessageItem,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        ResponseComputerToolCallOutputItem,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCallItem,
+        ResponseFunctionToolCallOutputItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_item_list.py b/src/openai/types/responses/response_item_list.py
new file mode 100644
index 0000000000..b43eacdb51
--- /dev/null
+++ b/src/openai/types/responses/response_item_list.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_item import ResponseItem
+
+__all__ = ["ResponseItemList"]
+
+
+class ResponseItemList(BaseModel):
+    data: List[ResponseItem]
+    """A list of items used to generate this response."""
+
+    first_id: str
+    """The ID of the first item in the list."""
+
+    has_more: bool
+    """Whether there are more items available."""
+
+    last_id: str
+    """The ID of the last item in the list."""
+
+    object: Literal["list"]
+    """The type of object returned, must be `list`."""
diff --git a/src/openai/types/responses/response_mcp_call_arguments_delta_event.py b/src/openai/types/responses/response_mcp_call_arguments_delta_event.py
new file mode 100644
index 0000000000..d6651e6999
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_arguments_delta_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDeltaEvent"]
+
+
+class ResponseMcpCallArgumentsDeltaEvent(BaseModel):
+    delta: object
+    """The partial update to the arguments for the MCP tool call."""
+
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.arguments_delta"]
+    """The type of the event. Always 'response.mcp_call.arguments_delta'."""
diff --git a/src/openai/types/responses/response_mcp_call_arguments_done_event.py b/src/openai/types/responses/response_mcp_call_arguments_done_event.py
new file mode 100644
index 0000000000..a7ce46ad36
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_arguments_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDoneEvent"]
+
+
+class ResponseMcpCallArgumentsDoneEvent(BaseModel):
+    arguments: object
+    """The finalized arguments for the MCP tool call."""
+
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.arguments_done"]
+    """The type of the event. Always 'response.mcp_call.arguments_done'."""
diff --git a/src/openai/types/responses/response_mcp_call_completed_event.py b/src/openai/types/responses/response_mcp_call_completed_event.py
new file mode 100644
index 0000000000..009fbc3c60
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_completed_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallCompletedEvent"]
+
+
+class ResponseMcpCallCompletedEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.completed"]
+    """The type of the event. Always 'response.mcp_call.completed'."""
diff --git a/src/openai/types/responses/response_mcp_call_failed_event.py b/src/openai/types/responses/response_mcp_call_failed_event.py
new file mode 100644
index 0000000000..e6edc6ded5
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_failed_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallFailedEvent"]
+
+
+class ResponseMcpCallFailedEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.failed"]
+    """The type of the event. Always 'response.mcp_call.failed'."""
diff --git a/src/openai/types/responses/response_mcp_call_in_progress_event.py b/src/openai/types/responses/response_mcp_call_in_progress_event.py
new file mode 100644
index 0000000000..401c316851
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_in_progress_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallInProgressEvent"]
+
+
+class ResponseMcpCallInProgressEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.in_progress"]
+    """The type of the event. Always 'response.mcp_call.in_progress'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_completed_event.py b/src/openai/types/responses/response_mcp_list_tools_completed_event.py
new file mode 100644
index 0000000000..6290c3cf9f
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_completed_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsCompletedEvent"]
+
+
+class ResponseMcpListToolsCompletedEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.completed"]
+    """The type of the event. Always 'response.mcp_list_tools.completed'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_failed_event.py b/src/openai/types/responses/response_mcp_list_tools_failed_event.py
new file mode 100644
index 0000000000..1f6e325b36
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_failed_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsFailedEvent"]
+
+
+class ResponseMcpListToolsFailedEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.failed"]
+    """The type of the event. Always 'response.mcp_list_tools.failed'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py b/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py
new file mode 100644
index 0000000000..236e5fe6e7
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsInProgressEvent"]
+
+
+class ResponseMcpListToolsInProgressEvent(BaseModel):
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.in_progress"]
+    """The type of the event. Always 'response.mcp_list_tools.in_progress'."""
diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py
new file mode 100644
index 0000000000..62f8f6fb3f
--- /dev/null
+++ b/src/openai/types/responses/response_output_item.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+
+__all__ = [
+    "ResponseOutputItem",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "McpCall",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+]
+
+
+class ImageGenerationCall(BaseModel):
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class McpCall(BaseModel):
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+
+class McpListToolsTool(BaseModel):
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+ResponseOutputItem: TypeAlias = Annotated[
+    Union[
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseFunctionToolCall,
+        ResponseFunctionWebSearch,
+        ResponseComputerToolCall,
+        ResponseReasoningItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        McpCall,
+        McpListTools,
+        McpApprovalRequest,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_output_item_added_event.py b/src/openai/types/responses/response_output_item_added_event.py
new file mode 100644
index 0000000000..7cd2a3946d
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_added_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    item: ResponseOutputItem
+    """The output item that was added."""
+
+    output_index: int
+    """The index of the output item that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_item.added"]
+    """The type of the event. Always `response.output_item.added`."""
diff --git a/src/openai/types/responses/response_output_item_done_event.py b/src/openai/types/responses/response_output_item_done_event.py
new file mode 100644
index 0000000000..37d3694cf7
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_done_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    item: ResponseOutputItem
+    """The output item that was marked done."""
+
+    output_index: int
+    """The index of the output item that was marked done."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_item.done"]
+    """The type of the event. Always `response.output_item.done`."""
diff --git a/src/openai/types/responses/response_output_message.py b/src/openai/types/responses/response_output_message.py
new file mode 100644
index 0000000000..3864aa2111
--- /dev/null
+++ b/src/openai/types/responses/response_output_message.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseOutputMessage", "Content"]
+
+Content: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseOutputMessage(BaseModel):
+    id: str
+    """The unique ID of the output message."""
+
+    content: List[Content]
+    """The content of the output message."""
+
+    role: Literal["assistant"]
+    """The role of the output message. Always `assistant`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_message_param.py b/src/openai/types/responses/response_output_message_param.py
new file mode 100644
index 0000000000..46cbbd20de
--- /dev/null
+++ b/src/openai/types/responses/response_output_message_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .response_output_text_param import ResponseOutputTextParam
+from .response_output_refusal_param import ResponseOutputRefusalParam
+
+__all__ = ["ResponseOutputMessageParam", "Content"]
+
+Content: TypeAlias = Union[ResponseOutputTextParam, ResponseOutputRefusalParam]
+
+
+class ResponseOutputMessageParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the output message."""
+
+    content: Required[Iterable[Content]]
+    """The content of the output message."""
+
+    role: Required[Literal["assistant"]]
+    """The role of the output message. Always `assistant`."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Required[Literal["message"]]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_refusal.py b/src/openai/types/responses/response_output_refusal.py
new file mode 100644
index 0000000000..eba581070d
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputRefusal"]
+
+
+class ResponseOutputRefusal(BaseModel):
+    refusal: str
+    """The refusal explanationfrom the model."""
+
+    type: Literal["refusal"]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_refusal_param.py b/src/openai/types/responses/response_output_refusal_param.py
new file mode 100644
index 0000000000..53140a6080
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseOutputRefusalParam"]
+
+
+class ResponseOutputRefusalParam(TypedDict, total=False):
+    refusal: Required[str]
+    """The refusal explanationfrom the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_text.py b/src/openai/types/responses/response_output_text.py
new file mode 100644
index 0000000000..aa97b629f0
--- /dev/null
+++ b/src/openai/types/responses/response_output_text.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseOutputText",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationContainerFileCitation",
+    "AnnotationFilePath",
+    "Logprob",
+    "LogprobTopLogprob",
+]
+
+
+class AnnotationFileCitation(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    filename: str
+    """The filename of the file cited."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_citation"]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class AnnotationContainerFileCitation(BaseModel):
+    container_id: str
+    """The ID of the container file."""
+
+    end_index: int
+    """The index of the last character of the container file citation in the message."""
+
+    file_id: str
+    """The ID of the file."""
+
+    filename: str
+    """The filename of the container file cited."""
+
+    start_index: int
+    """The index of the first character of the container file citation in the message."""
+
+    type: Literal["container_file_citation"]
+    """The type of the container file citation. Always `container_file_citation`."""
+
+
+class AnnotationFilePath(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_path"]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Annotated[
+    Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationContainerFileCitation, AnnotationFilePath],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class LogprobTopLogprob(BaseModel):
+    token: str
+
+    bytes: List[int]
+
+    logprob: float
+
+
+class Logprob(BaseModel):
+    token: str
+
+    bytes: List[int]
+
+    logprob: float
+
+    top_logprobs: List[LogprobTopLogprob]
+
+
+class ResponseOutputText(BaseModel):
+    annotations: List[Annotation]
+    """The annotations of the text output."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+    logprobs: Optional[List[Logprob]] = None
diff --git a/src/openai/types/responses/response_output_text_annotation_added_event.py b/src/openai/types/responses/response_output_text_annotation_added_event.py
new file mode 100644
index 0000000000..ce96790c92
--- /dev/null
+++ b/src/openai/types/responses/response_output_text_annotation_added_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputTextAnnotationAddedEvent"]
+
+
+class ResponseOutputTextAnnotationAddedEvent(BaseModel):
+    annotation: object
+    """The annotation object being added. (See annotation schema for details.)"""
+
+    annotation_index: int
+    """The index of the annotation within the content part."""
+
+    content_index: int
+    """The index of the content part within the output item."""
+
+    item_id: str
+    """The unique identifier of the item to which the annotation is being added."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_text_annotation.added"]
+    """The type of the event. Always 'response.output_text_annotation.added'."""
diff --git a/src/openai/types/responses/response_output_text_param.py b/src/openai/types/responses/response_output_text_param.py
new file mode 100644
index 0000000000..63d2d394a8
--- /dev/null
+++ b/src/openai/types/responses/response_output_text_param.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseOutputTextParam",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationContainerFileCitation",
+    "AnnotationFilePath",
+    "Logprob",
+    "LogprobTopLogprob",
+]
+
+
+class AnnotationFileCitation(TypedDict, total=False):
+    file_id: Required[str]
+    """The ID of the file."""
+
+    filename: Required[str]
+    """The filename of the file cited."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_citation"]]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(TypedDict, total=False):
+    end_index: Required[int]
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: Required[int]
+    """The index of the first character of the URL citation in the message."""
+
+    title: Required[str]
+    """The title of the web resource."""
+
+    type: Required[Literal["url_citation"]]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: Required[str]
+    """The URL of the web resource."""
+
+
+class AnnotationContainerFileCitation(TypedDict, total=False):
+    container_id: Required[str]
+    """The ID of the container file."""
+
+    end_index: Required[int]
+    """The index of the last character of the container file citation in the message."""
+
+    file_id: Required[str]
+    """The ID of the file."""
+
+    filename: Required[str]
+    """The filename of the container file cited."""
+
+    start_index: Required[int]
+    """The index of the first character of the container file citation in the message."""
+
+    type: Required[Literal["container_file_citation"]]
+    """The type of the container file citation. Always `container_file_citation`."""
+
+
+class AnnotationFilePath(TypedDict, total=False):
+    file_id: Required[str]
+    """The ID of the file."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_path"]]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Union[
+    AnnotationFileCitation, AnnotationURLCitation, AnnotationContainerFileCitation, AnnotationFilePath
+]
+
+
+class LogprobTopLogprob(TypedDict, total=False):
+    token: Required[str]
+
+    bytes: Required[Iterable[int]]
+
+    logprob: Required[float]
+
+
+class Logprob(TypedDict, total=False):
+    token: Required[str]
+
+    bytes: Required[Iterable[int]]
+
+    logprob: Required[float]
+
+    top_logprobs: Required[Iterable[LogprobTopLogprob]]
+
+
+class ResponseOutputTextParam(TypedDict, total=False):
+    annotations: Required[Iterable[Annotation]]
+    """The annotations of the text output."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+    logprobs: Iterable[Logprob]
diff --git a/src/openai/types/responses/response_prompt.py b/src/openai/types/responses/response_prompt.py
new file mode 100644
index 0000000000..537c2f8fbc
--- /dev/null
+++ b/src/openai/types/responses/response_prompt.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Union, Optional
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponsePrompt", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputText, ResponseInputImage, ResponseInputFile]
+
+
+class ResponsePrompt(BaseModel):
+    id: str
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]] = None
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str] = None
+    """Optional version of the prompt template."""
diff --git a/src/openai/types/responses/response_prompt_param.py b/src/openai/types/responses/response_prompt_param.py
new file mode 100644
index 0000000000..d935fa5191
--- /dev/null
+++ b/src/openai/types/responses/response_prompt_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponsePromptParam", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+
+class ResponsePromptParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]]
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str]
+    """Optional version of the prompt template."""
diff --git a/src/openai/types/responses/response_queued_event.py b/src/openai/types/responses/response_queued_event.py
new file mode 100644
index 0000000000..40257408a4
--- /dev/null
+++ b/src/openai/types/responses/response_queued_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseQueuedEvent"]
+
+
+class ResponseQueuedEvent(BaseModel):
+    response: Response
+    """The full response object that is queued."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.queued"]
+    """The type of the event. Always 'response.queued'."""
diff --git a/src/openai/types/responses/response_reasoning_delta_event.py b/src/openai/types/responses/response_reasoning_delta_event.py
new file mode 100644
index 0000000000..f37d3d370c
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningDeltaEvent"]
+
+
+class ResponseReasoningDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the reasoning content part within the output item."""
+
+    delta: object
+    """The partial update to the reasoning content."""
+
+    item_id: str
+    """The unique identifier of the item for which reasoning is being updated."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.reasoning.delta"]
+    """The type of the event. Always 'response.reasoning.delta'."""
diff --git a/src/openai/types/responses/response_reasoning_done_event.py b/src/openai/types/responses/response_reasoning_done_event.py
new file mode 100644
index 0000000000..9f8b127d7e
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningDoneEvent"]
+
+
+class ResponseReasoningDoneEvent(BaseModel):
+    content_index: int
+    """The index of the reasoning content part within the output item."""
+
+    item_id: str
+    """The unique identifier of the item for which reasoning is finalized."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    text: str
+    """The finalized reasoning text."""
+
+    type: Literal["response.reasoning.done"]
+    """The type of the event. Always 'response.reasoning.done'."""
diff --git a/src/openai/types/responses/response_reasoning_item.py b/src/openai/types/responses/response_reasoning_item.py
new file mode 100644
index 0000000000..f5da7802f8
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningItem", "Summary"]
+
+
+class Summary(BaseModel):
+    text: str
+    """
+    A short summary of the reasoning used by the model when generating the response.
+    """
+
+    type: Literal["summary_text"]
+    """The type of the object. Always `summary_text`."""
+
+
+class ResponseReasoningItem(BaseModel):
+    id: str
+    """The unique identifier of the reasoning content."""
+
+    summary: List[Summary]
+    """Reasoning text contents."""
+
+    type: Literal["reasoning"]
+    """The type of the object. Always `reasoning`."""
+
+    encrypted_content: Optional[str] = None
+    """
+    The encrypted content of the reasoning item - populated when a response is
+    generated with `reasoning.encrypted_content` in the `include` parameter.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_reasoning_item_param.py b/src/openai/types/responses/response_reasoning_item_param.py
new file mode 100644
index 0000000000..2cfa5312ed
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item_param.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseReasoningItemParam", "Summary"]
+
+
+class Summary(TypedDict, total=False):
+    text: Required[str]
+    """
+    A short summary of the reasoning used by the model when generating the response.
+    """
+
+    type: Required[Literal["summary_text"]]
+    """The type of the object. Always `summary_text`."""
+
+
+class ResponseReasoningItemParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique identifier of the reasoning content."""
+
+    summary: Required[Iterable[Summary]]
+    """Reasoning text contents."""
+
+    type: Required[Literal["reasoning"]]
+    """The type of the object. Always `reasoning`."""
+
+    encrypted_content: Optional[str]
+    """
+    The encrypted content of the reasoning item - populated when a response is
+    generated with `reasoning.encrypted_content` in the `include` parameter.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_reasoning_summary_delta_event.py b/src/openai/types/responses/response_reasoning_summary_delta_event.py
new file mode 100644
index 0000000000..519a4f24ac
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryDeltaEvent"]
+
+
+class ResponseReasoningSummaryDeltaEvent(BaseModel):
+    delta: object
+    """The partial update to the reasoning summary content."""
+
+    item_id: str
+    """
+    The unique identifier of the item for which the reasoning summary is being
+    updated.
+    """
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the output item."""
+
+    type: Literal["response.reasoning_summary.delta"]
+    """The type of the event. Always 'response.reasoning_summary.delta'."""
diff --git a/src/openai/types/responses/response_reasoning_summary_done_event.py b/src/openai/types/responses/response_reasoning_summary_done_event.py
new file mode 100644
index 0000000000..98bcf9cb9d
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryDoneEvent"]
+
+
+class ResponseReasoningSummaryDoneEvent(BaseModel):
+    item_id: str
+    """The unique identifier of the item for which the reasoning summary is finalized."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the output item."""
+
+    text: str
+    """The finalized reasoning summary text."""
+
+    type: Literal["response.reasoning_summary.done"]
+    """The type of the event. Always 'response.reasoning_summary.done'."""
diff --git a/src/openai/types/responses/response_reasoning_summary_part_added_event.py b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
new file mode 100644
index 0000000000..dc755b253a
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartAddedEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The summary part that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.added"]
+    """The type of the event. Always `response.reasoning_summary_part.added`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_part_done_event.py b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
new file mode 100644
index 0000000000..7cc0b56d66
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartDoneEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The completed summary part."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.done"]
+    """The type of the event. Always `response.reasoning_summary_part.done`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_delta_event.py b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
new file mode 100644
index 0000000000..96652991b6
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDeltaEvent"]
+
+
+class ResponseReasoningSummaryTextDeltaEvent(BaseModel):
+    delta: str
+    """The text delta that was added to the summary."""
+
+    item_id: str
+    """The ID of the item this summary text delta is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text delta is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.delta"]
+    """The type of the event. Always `response.reasoning_summary_text.delta`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_done_event.py b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
new file mode 100644
index 0000000000..b35b82316a
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDoneEvent"]
+
+
+class ResponseReasoningSummaryTextDoneEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary text is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    text: str
+    """The full text of the completed reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.done"]
+    """The type of the event. Always `response.reasoning_summary_text.done`."""
diff --git a/src/openai/types/responses/response_refusal_delta_event.py b/src/openai/types/responses/response_refusal_delta_event.py
new file mode 100644
index 0000000000..03c903ed28
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDeltaEvent"]
+
+
+class ResponseRefusalDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the refusal text is added to."""
+
+    delta: str
+    """The refusal text that is added."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is added to."""
+
+    output_index: int
+    """The index of the output item that the refusal text is added to."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.refusal.delta"]
+    """The type of the event. Always `response.refusal.delta`."""
diff --git a/src/openai/types/responses/response_refusal_done_event.py b/src/openai/types/responses/response_refusal_done_event.py
new file mode 100644
index 0000000000..61fd51aab0
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDoneEvent"]
+
+
+class ResponseRefusalDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the refusal text is finalized."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is finalized."""
+
+    output_index: int
+    """The index of the output item that the refusal text is finalized."""
+
+    refusal: str
+    """The refusal text that is finalized."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.refusal.done"]
+    """The type of the event. Always `response.refusal.done`."""
diff --git a/src/openai/types/responses/response_retrieve_params.py b/src/openai/types/responses/response_retrieve_params.py
new file mode 100644
index 0000000000..a092bd7fb8
--- /dev/null
+++ b/src/openai/types/responses/response_retrieve_params.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_includable import ResponseIncludable
+
+__all__ = ["ResponseRetrieveParamsBase", "ResponseRetrieveParamsNonStreaming", "ResponseRetrieveParamsStreaming"]
+
+
+class ResponseRetrieveParamsBase(TypedDict, total=False):
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for Response creation above for more information.
+    """
+
+    starting_after: int
+    """The sequence number of the event after which to start streaming."""
+
+
+class ResponseRetrieveParamsNonStreaming(ResponseRetrieveParamsBase, total=False):
+    stream: Literal[False]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+class ResponseRetrieveParamsStreaming(ResponseRetrieveParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+ResponseRetrieveParams = Union[ResponseRetrieveParamsNonStreaming, ResponseRetrieveParamsStreaming]
diff --git a/src/openai/types/responses/response_status.py b/src/openai/types/responses/response_status.py
new file mode 100644
index 0000000000..a7887b92d2
--- /dev/null
+++ b/src/openai/types/responses/response_status.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseStatus"]
+
+ResponseStatus: TypeAlias = Literal["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]
diff --git a/src/openai/types/responses/response_stream_event.py b/src/openai/types/responses/response_stream_event.py
new file mode 100644
index 0000000000..24a83f1aa2
--- /dev/null
+++ b/src/openai/types/responses/response_stream_event.py
@@ -0,0 +1,120 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_error_event import ResponseErrorEvent
+from .response_failed_event import ResponseFailedEvent
+from .response_queued_event import ResponseQueuedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_completed_event import ResponseCompletedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_incomplete_event import ResponseIncompleteEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent
+from .response_refusal_done_event import ResponseRefusalDoneEvent
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent
+from .response_reasoning_done_event import ResponseReasoningDoneEvent
+from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent
+from .response_reasoning_delta_event import ResponseReasoningDeltaEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .response_mcp_call_completed_event import ResponseMcpCallCompletedEvent
+from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .response_reasoning_summary_done_event import ResponseReasoningSummaryDoneEvent
+from .response_mcp_call_arguments_done_event import ResponseMcpCallArgumentsDoneEvent
+from .response_reasoning_summary_delta_event import ResponseReasoningSummaryDeltaEvent
+from .response_image_gen_call_completed_event import ResponseImageGenCallCompletedEvent
+from .response_mcp_call_arguments_delta_event import ResponseMcpCallArgumentsDeltaEvent
+from .response_mcp_list_tools_completed_event import ResponseMcpListToolsCompletedEvent
+from .response_image_gen_call_generating_event import ResponseImageGenCallGeneratingEvent
+from .response_web_search_call_completed_event import ResponseWebSearchCallCompletedEvent
+from .response_web_search_call_searching_event import ResponseWebSearchCallSearchingEvent
+from .response_file_search_call_completed_event import ResponseFileSearchCallCompletedEvent
+from .response_file_search_call_searching_event import ResponseFileSearchCallSearchingEvent
+from .response_image_gen_call_in_progress_event import ResponseImageGenCallInProgressEvent
+from .response_mcp_list_tools_in_progress_event import ResponseMcpListToolsInProgressEvent
+from .response_reasoning_summary_part_done_event import ResponseReasoningSummaryPartDoneEvent
+from .response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent
+from .response_web_search_call_in_progress_event import ResponseWebSearchCallInProgressEvent
+from .response_file_search_call_in_progress_event import ResponseFileSearchCallInProgressEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_image_gen_call_partial_image_event import ResponseImageGenCallPartialImageEvent
+from .response_output_text_annotation_added_event import ResponseOutputTextAnnotationAddedEvent
+from .response_reasoning_summary_part_added_event import ResponseReasoningSummaryPartAddedEvent
+from .response_reasoning_summary_text_delta_event import ResponseReasoningSummaryTextDeltaEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .response_code_interpreter_call_code_done_event import ResponseCodeInterpreterCallCodeDoneEvent
+from .response_code_interpreter_call_completed_event import ResponseCodeInterpreterCallCompletedEvent
+from .response_code_interpreter_call_code_delta_event import ResponseCodeInterpreterCallCodeDeltaEvent
+from .response_code_interpreter_call_in_progress_event import ResponseCodeInterpreterCallInProgressEvent
+from .response_code_interpreter_call_interpreting_event import ResponseCodeInterpreterCallInterpretingEvent
+
+__all__ = ["ResponseStreamEvent"]
+
+ResponseStreamEvent: TypeAlias = Annotated[
+    Union[
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseCodeInterpreterCallCodeDeltaEvent,
+        ResponseCodeInterpreterCallCodeDoneEvent,
+        ResponseCodeInterpreterCallCompletedEvent,
+        ResponseCodeInterpreterCallInProgressEvent,
+        ResponseCodeInterpreterCallInterpretingEvent,
+        ResponseCompletedEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseErrorEvent,
+        ResponseFileSearchCallCompletedEvent,
+        ResponseFileSearchCallInProgressEvent,
+        ResponseFileSearchCallSearchingEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseInProgressEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseReasoningSummaryPartAddedEvent,
+        ResponseReasoningSummaryPartDoneEvent,
+        ResponseReasoningSummaryTextDeltaEvent,
+        ResponseReasoningSummaryTextDoneEvent,
+        ResponseRefusalDeltaEvent,
+        ResponseRefusalDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseWebSearchCallCompletedEvent,
+        ResponseWebSearchCallInProgressEvent,
+        ResponseWebSearchCallSearchingEvent,
+        ResponseImageGenCallCompletedEvent,
+        ResponseImageGenCallGeneratingEvent,
+        ResponseImageGenCallInProgressEvent,
+        ResponseImageGenCallPartialImageEvent,
+        ResponseMcpCallArgumentsDeltaEvent,
+        ResponseMcpCallArgumentsDoneEvent,
+        ResponseMcpCallCompletedEvent,
+        ResponseMcpCallFailedEvent,
+        ResponseMcpCallInProgressEvent,
+        ResponseMcpListToolsCompletedEvent,
+        ResponseMcpListToolsFailedEvent,
+        ResponseMcpListToolsInProgressEvent,
+        ResponseOutputTextAnnotationAddedEvent,
+        ResponseQueuedEvent,
+        ResponseReasoningDeltaEvent,
+        ResponseReasoningDoneEvent,
+        ResponseReasoningSummaryDeltaEvent,
+        ResponseReasoningSummaryDoneEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_text_config.py b/src/openai/types/responses/response_text_config.py
new file mode 100644
index 0000000000..a1894a9176
--- /dev/null
+++ b/src/openai/types/responses/response_text_config.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .response_format_text_config import ResponseFormatTextConfig
+
+__all__ = ["ResponseTextConfig"]
+
+
+class ResponseTextConfig(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
diff --git a/src/openai/types/responses/response_text_config_param.py b/src/openai/types/responses/response_text_config_param.py
new file mode 100644
index 0000000000..aec064bf89
--- /dev/null
+++ b/src/openai/types/responses/response_text_config_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .response_format_text_config_param import ResponseFormatTextConfigParam
+
+__all__ = ["ResponseTextConfigParam"]
+
+
+class ResponseTextConfigParam(TypedDict, total=False):
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
diff --git a/src/openai/types/responses/response_text_delta_event.py b/src/openai/types/responses/response_text_delta_event.py
new file mode 100644
index 0000000000..7e4aec7024
--- /dev/null
+++ b/src/openai/types/responses/response_text_delta_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the text delta was added to."""
+
+    delta: str
+    """The text delta that was added."""
+
+    item_id: str
+    """The ID of the output item that the text delta was added to."""
+
+    output_index: int
+    """The index of the output item that the text delta was added to."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.output_text.delta"]
+    """The type of the event. Always `response.output_text.delta`."""
diff --git a/src/openai/types/responses/response_text_done_event.py b/src/openai/types/responses/response_text_done_event.py
new file mode 100644
index 0000000000..0d5ed4dd19
--- /dev/null
+++ b/src/openai/types/responses/response_text_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the text content is finalized."""
+
+    item_id: str
+    """The ID of the output item that the text content is finalized."""
+
+    output_index: int
+    """The index of the output item that the text content is finalized."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    text: str
+    """The text content that is finalized."""
+
+    type: Literal["response.output_text.done"]
+    """The type of the event. Always `response.output_text.done`."""
diff --git a/src/openai/types/responses/response_usage.py b/src/openai/types/responses/response_usage.py
new file mode 100644
index 0000000000..52b93ac578
--- /dev/null
+++ b/src/openai/types/responses/response_usage.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseUsage", "InputTokensDetails", "OutputTokensDetails"]
+
+
+class InputTokensDetails(BaseModel):
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class OutputTokensDetails(BaseModel):
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+
+class ResponseUsage(BaseModel):
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: InputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: OutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
diff --git a/src/openai/types/responses/response_web_search_call_completed_event.py b/src/openai/types/responses/response_web_search_call_completed_event.py
new file mode 100644
index 0000000000..497f7bfe35
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_completed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallCompletedEvent"]
+
+
+class ResponseWebSearchCallCompletedEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.completed"]
+    """The type of the event. Always `response.web_search_call.completed`."""
diff --git a/src/openai/types/responses/response_web_search_call_in_progress_event.py b/src/openai/types/responses/response_web_search_call_in_progress_event.py
new file mode 100644
index 0000000000..da8b3fe404
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_in_progress_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallInProgressEvent"]
+
+
+class ResponseWebSearchCallInProgressEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.in_progress"]
+    """The type of the event. Always `response.web_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_web_search_call_searching_event.py b/src/openai/types/responses/response_web_search_call_searching_event.py
new file mode 100644
index 0000000000..42df9cb298
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_searching_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallSearchingEvent"]
+
+
+class ResponseWebSearchCallSearchingEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.searching"]
+    """The type of the event. Always `response.web_search_call.searching`."""
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
new file mode 100644
index 0000000000..904c474e40
--- /dev/null
+++ b/src/openai/types/responses/tool.py
@@ -0,0 +1,172 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .computer_tool import ComputerTool
+from .function_tool import FunctionTool
+from .web_search_tool import WebSearchTool
+from .file_search_tool import FileSearchTool
+
+__all__ = [
+    "Tool",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpAllowedToolsFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+    "CodeInterpreter",
+    "CodeInterpreterContainer",
+    "CodeInterpreterContainerCodeInterpreterToolAuto",
+    "ImageGeneration",
+    "ImageGenerationInputImageMask",
+    "LocalShell",
+]
+
+
+class McpAllowedToolsMcpAllowedToolsFilter(BaseModel):
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpAllowedToolsFilter, None]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+    tool_names: Optional[List[str]] = None
+    """List of tools that require approval."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(BaseModel):
+    tool_names: Optional[List[str]] = None
+    """List of tools that do not require approval."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(BaseModel):
+    always: Optional[McpRequireApprovalMcpToolApprovalFilterAlways] = None
+    """A list of tools that always require approval."""
+
+    never: Optional[McpRequireApprovalMcpToolApprovalFilterNever] = None
+    """A list of tools that never require approval."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
+
+
+class Mcp(BaseModel):
+    server_label: str
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    server_url: str
+    """The URL for the MCP server."""
+
+    type: Literal["mcp"]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools] = None
+    """List of allowed tool names or a filter object."""
+
+    headers: Optional[Dict[str, str]] = None
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval] = None
+    """Specify which of the MCP server's tools require approval."""
+
+
+class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel):
+    type: Literal["auto"]
+    """Always `auto`."""
+
+    file_ids: Optional[List[str]] = None
+    """An optional list of uploaded files to make available to your code."""
+
+
+CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
+
+
+class CodeInterpreter(BaseModel):
+    container: CodeInterpreterContainer
+    """The code interpreter container.
+
+    Can be a container ID or an object that specifies uploaded file IDs to make
+    available to your code.
+    """
+
+    type: Literal["code_interpreter"]
+    """The type of the code interpreter tool. Always `code_interpreter`."""
+
+
+class ImageGenerationInputImageMask(BaseModel):
+    file_id: Optional[str] = None
+    """File ID for the mask image."""
+
+    image_url: Optional[str] = None
+    """Base64-encoded mask image."""
+
+
+class ImageGeneration(BaseModel):
+    type: Literal["image_generation"]
+    """The type of the image generation tool. Always `image_generation`."""
+
+    background: Optional[Literal["transparent", "opaque", "auto"]] = None
+    """Background type for the generated image.
+
+    One of `transparent`, `opaque`, or `auto`. Default: `auto`.
+    """
+
+    input_image_mask: Optional[ImageGenerationInputImageMask] = None
+    """Optional mask for inpainting.
+
+    Contains `image_url` (string, optional) and `file_id` (string, optional).
+    """
+
+    model: Optional[Literal["gpt-image-1"]] = None
+    """The image generation model to use. Default: `gpt-image-1`."""
+
+    moderation: Optional[Literal["auto", "low"]] = None
+    """Moderation level for the generated image. Default: `auto`."""
+
+    output_compression: Optional[int] = None
+    """Compression level for the output image. Default: 100."""
+
+    output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+    """The output format of the generated image.
+
+    One of `png`, `webp`, or `jpeg`. Default: `png`.
+    """
+
+    partial_images: Optional[int] = None
+    """
+    Number of partial images to generate in streaming mode, from 0 (default value)
+    to 3.
+    """
+
+    quality: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """The quality of the generated image.
+
+    One of `low`, `medium`, `high`, or `auto`. Default: `auto`.
+    """
+
+    size: Optional[Literal["1024x1024", "1024x1536", "1536x1024", "auto"]] = None
+    """The size of the generated image.
+
+    One of `1024x1024`, `1024x1536`, `1536x1024`, or `auto`. Default: `auto`.
+    """
+
+
+class LocalShell(BaseModel):
+    type: Literal["local_shell"]
+    """The type of the local shell tool. Always `local_shell`."""
+
+
+Tool: TypeAlias = Annotated[
+    Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool, Mcp, CodeInterpreter, ImageGeneration, LocalShell],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/tool_choice_function.py b/src/openai/types/responses/tool_choice_function.py
new file mode 100644
index 0000000000..8d2a4f2822
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceFunction"]
+
+
+class ToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
+
+    type: Literal["function"]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_function_param.py b/src/openai/types/responses/tool_choice_function_param.py
new file mode 100644
index 0000000000..910537fd97
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceFunctionParam"]
+
+
+class ToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+    type: Required[Literal["function"]]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_mcp.py b/src/openai/types/responses/tool_choice_mcp.py
new file mode 100644
index 0000000000..8763d81635
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceMcp"]
+
+
+class ToolChoiceMcp(BaseModel):
+    server_label: str
+    """The label of the MCP server to use."""
+
+    type: Literal["mcp"]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str] = None
+    """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_mcp_param.py b/src/openai/types/responses/tool_choice_mcp_param.py
new file mode 100644
index 0000000000..afcceb8cc5
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceMcpParam"]
+
+
+class ToolChoiceMcpParam(TypedDict, total=False):
+    server_label: Required[str]
+    """The label of the MCP server to use."""
+
+    type: Required[Literal["mcp"]]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str]
+    """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_options.py b/src/openai/types/responses/tool_choice_options.py
new file mode 100644
index 0000000000..c200db54e1
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_options.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ToolChoiceOptions"]
+
+ToolChoiceOptions: TypeAlias = Literal["none", "auto", "required"]
diff --git a/src/openai/types/responses/tool_choice_types.py b/src/openai/types/responses/tool_choice_types.py
new file mode 100644
index 0000000000..b31a826051
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceTypes"]
+
+
+class ToolChoiceTypes(BaseModel):
+    type: Literal[
+        "file_search",
+        "web_search_preview",
+        "computer_use_preview",
+        "web_search_preview_2025_03_11",
+        "image_generation",
+        "code_interpreter",
+    ]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    - `code_interpreter`
+    - `image_generation`
+    """
diff --git a/src/openai/types/responses/tool_choice_types_param.py b/src/openai/types/responses/tool_choice_types_param.py
new file mode 100644
index 0000000000..15e0357471
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceTypesParam"]
+
+
+class ToolChoiceTypesParam(TypedDict, total=False):
+    type: Required[
+        Literal[
+            "file_search",
+            "web_search_preview",
+            "computer_use_preview",
+            "web_search_preview_2025_03_11",
+            "image_generation",
+            "code_interpreter",
+        ]
+    ]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    - `code_interpreter`
+    - `image_generation`
+    """
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
new file mode 100644
index 0000000000..4174560d42
--- /dev/null
+++ b/src/openai/types/responses/tool_param.py
@@ -0,0 +1,182 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .computer_tool_param import ComputerToolParam
+from .function_tool_param import FunctionToolParam
+from .web_search_tool_param import WebSearchToolParam
+from .file_search_tool_param import FileSearchToolParam
+from ..chat.chat_completion_tool_param import ChatCompletionToolParam
+
+__all__ = [
+    "ToolParam",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpAllowedToolsFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+    "CodeInterpreter",
+    "CodeInterpreterContainer",
+    "CodeInterpreterContainerCodeInterpreterToolAuto",
+    "ImageGeneration",
+    "ImageGenerationInputImageMask",
+    "LocalShell",
+]
+
+
+class McpAllowedToolsMcpAllowedToolsFilter(TypedDict, total=False):
+    tool_names: List[str]
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpAllowedToolsFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+    tool_names: List[str]
+    """List of tools that require approval."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+    tool_names: List[str]
+    """List of tools that do not require approval."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+    always: McpRequireApprovalMcpToolApprovalFilterAlways
+    """A list of tools that always require approval."""
+
+    never: McpRequireApprovalMcpToolApprovalFilterNever
+    """A list of tools that never require approval."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+    server_label: Required[str]
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    server_url: Required[str]
+    """The URL for the MCP server."""
+
+    type: Required[Literal["mcp"]]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools]
+    """List of allowed tool names or a filter object."""
+
+    headers: Optional[Dict[str, str]]
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval]
+    """Specify which of the MCP server's tools require approval."""
+
+
+class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+    file_ids: List[str]
+    """An optional list of uploaded files to make available to your code."""
+
+
+CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
+
+
+class CodeInterpreter(TypedDict, total=False):
+    container: Required[CodeInterpreterContainer]
+    """The code interpreter container.
+
+    Can be a container ID or an object that specifies uploaded file IDs to make
+    available to your code.
+    """
+
+    type: Required[Literal["code_interpreter"]]
+    """The type of the code interpreter tool. Always `code_interpreter`."""
+
+
+class ImageGenerationInputImageMask(TypedDict, total=False):
+    file_id: str
+    """File ID for the mask image."""
+
+    image_url: str
+    """Base64-encoded mask image."""
+
+
+class ImageGeneration(TypedDict, total=False):
+    type: Required[Literal["image_generation"]]
+    """The type of the image generation tool. Always `image_generation`."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """Background type for the generated image.
+
+    One of `transparent`, `opaque`, or `auto`. Default: `auto`.
+    """
+
+    input_image_mask: ImageGenerationInputImageMask
+    """Optional mask for inpainting.
+
+    Contains `image_url` (string, optional) and `file_id` (string, optional).
+    """
+
+    model: Literal["gpt-image-1"]
+    """The image generation model to use. Default: `gpt-image-1`."""
+
+    moderation: Literal["auto", "low"]
+    """Moderation level for the generated image. Default: `auto`."""
+
+    output_compression: int
+    """Compression level for the output image. Default: 100."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format of the generated image.
+
+    One of `png`, `webp`, or `jpeg`. Default: `png`.
+    """
+
+    partial_images: int
+    """
+    Number of partial images to generate in streaming mode, from 0 (default value)
+    to 3.
+    """
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality of the generated image.
+
+    One of `low`, `medium`, `high`, or `auto`. Default: `auto`.
+    """
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the generated image.
+
+    One of `1024x1024`, `1024x1536`, `1536x1024`, or `auto`. Default: `auto`.
+    """
+
+
+class LocalShell(TypedDict, total=False):
+    type: Required[Literal["local_shell"]]
+    """The type of the local shell tool. Always `local_shell`."""
+
+
+ToolParam: TypeAlias = Union[
+    FunctionToolParam,
+    FileSearchToolParam,
+    WebSearchToolParam,
+    ComputerToolParam,
+    Mcp,
+    CodeInterpreter,
+    ImageGeneration,
+    LocalShell,
+]
+
+
+ParseableToolParam: TypeAlias = Union[ToolParam, ChatCompletionToolParam]
diff --git a/src/openai/types/responses/web_search_tool.py b/src/openai/types/responses/web_search_tool.py
new file mode 100644
index 0000000000..a6bf951145
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["WebSearchTool", "UserLocation"]
+
+
+class UserLocation(BaseModel):
+    type: Literal["approximate"]
+    """The type of location approximation. Always `approximate`."""
+
+    city: Optional[str] = None
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str] = None
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str] = None
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str] = None
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchTool(BaseModel):
+    type: Literal["web_search_preview", "web_search_preview_2025_03_11"]
+    """The type of the web search tool.
+
+    One of `web_search_preview` or `web_search_preview_2025_03_11`.
+    """
+
+    search_context_size: Optional[Literal["low", "medium", "high"]] = None
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation] = None
+    """The user's location."""
diff --git a/src/openai/types/responses/web_search_tool_param.py b/src/openai/types/responses/web_search_tool_param.py
new file mode 100644
index 0000000000..d0335c01a3
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool_param.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["WebSearchToolParam", "UserLocation"]
+
+
+class UserLocation(TypedDict, total=False):
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
+
+    city: Optional[str]
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str]
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str]
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str]
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["web_search_preview", "web_search_preview_2025_03_11"]]
+    """The type of the web search tool.
+
+    One of `web_search_preview` or `web_search_preview_2025_03_11`.
+    """
+
+    search_context_size: Literal["low", "medium", "high"]
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation]
+    """The user's location."""
diff --git a/src/openai/types/shared/__init__.py b/src/openai/types/shared/__init__.py
index 05bc4ff9ba..6ad0ed5e01 100644
--- a/src/openai/types/shared/__init__.py
+++ b/src/openai/types/shared/__init__.py
@@ -1,4 +1,16 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .all_models import AllModels as AllModels
+from .chat_model import ChatModel as ChatModel
+from .error_object import ErrorObject as ErrorObject
+from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py
new file mode 100644
index 0000000000..828f3b5669
--- /dev/null
+++ b/src/openai/types/shared/all_models.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["AllModels"]
+
+AllModels: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+    ],
+]
diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py
new file mode 100644
index 0000000000..309368a384
--- /dev/null
+++ b/src/openai/types/shared/chat_model.py
@@ -0,0 +1,63 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-audio-preview-2025-06-03",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
+    "chatgpt-4o-latest",
+    "codex-mini-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared/comparison_filter.py b/src/openai/types/shared/comparison_filter.py
new file mode 100644
index 0000000000..2ec2651ff2
--- /dev/null
+++ b/src/openai/types/shared/comparison_filter.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(BaseModel):
+    key: str
+    """The key to compare against the value."""
+
+    type: Literal["eq", "ne", "gt", "gte", "lt", "lte"]
+    """Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    """
+
+    value: Union[str, float, bool]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared/compound_filter.py b/src/openai/types/shared/compound_filter.py
new file mode 100644
index 0000000000..3aefa43647
--- /dev/null
+++ b/src/openai/types/shared/compound_filter.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(BaseModel):
+    filters: List[Filter]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Literal["and", "or"]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared/error_object.py b/src/openai/types/shared/error_object.py
new file mode 100644
index 0000000000..32d7045e00
--- /dev/null
+++ b/src/openai/types/shared/error_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ErrorObject"]
+
+
+class ErrorObject(BaseModel):
+    code: Optional[str] = None
+
+    message: str
+
+    param: Optional[str] = None
+
+    type: str
diff --git a/src/openai/types/shared/function_definition.py b/src/openai/types/shared/function_definition.py
index bfcee50c85..06baa23170 100644
--- a/src/openai/types/shared/function_definition.py
+++ b/src/openai/types/shared/function_definition.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Optional
 
@@ -16,20 +16,28 @@ class FunctionDefinition(BaseModel):
     of 64.
     """
 
-    parameters: FunctionParameters
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: Optional[FunctionParameters] = None
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
+    Omitting `parameters` defines a function with an empty parameter list.
     """
 
-    description: Optional[str] = None
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    strict: Optional[bool] = None
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](docs/guides/function-calling).
     """
diff --git a/src/openai/types/shared/function_parameters.py b/src/openai/types/shared/function_parameters.py
index 405c2d14cc..a3d83e3496 100644
--- a/src/openai/types/shared/function_parameters.py
+++ b/src/openai/types/shared/function_parameters.py
@@ -1,7 +1,8 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared/metadata.py b/src/openai/types/shared/metadata.py
new file mode 100644
index 0000000000..0da88c679c
--- /dev/null
+++ b/src/openai/types/shared/metadata.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
new file mode 100644
index 0000000000..107aab2e4a
--- /dev/null
+++ b/src/openai/types/shared/reasoning.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(BaseModel):
+    effort: Optional[ReasoningEffort] = None
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """**Deprecated:** use `summary` instead.
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
+    """
diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py
new file mode 100644
index 0000000000..ace21b67e4
--- /dev/null
+++ b/src/openai/types/shared/reasoning_effort.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["low", "medium", "high"]]
diff --git a/src/openai/types/shared/response_format_json_object.py b/src/openai/types/shared/response_format_json_object.py
new file mode 100644
index 0000000000..2aaa5dbdfe
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(BaseModel):
+    type: Literal["json_object"]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared/response_format_json_schema.py b/src/openai/types/shared/response_format_json_schema.py
new file mode 100644
index 0000000000..c7924446f4
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_schema.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(BaseModel):
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema_: Optional[Dict[str, object]] = FieldInfo(alias="schema", default=None)
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(BaseModel):
+    json_schema: JSONSchema
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared/response_format_text.py b/src/openai/types/shared/response_format_text.py
new file mode 100644
index 0000000000..f0c8cfb700
--- /dev/null
+++ b/src/openai/types/shared/response_format_text.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(BaseModel):
+    type: Literal["text"]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py
new file mode 100644
index 0000000000..4d35356806
--- /dev/null
+++ b/src/openai/types/shared/responses_model.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+    ],
+]
diff --git a/src/openai/types/shared_params/__init__.py b/src/openai/types/shared_params/__init__.py
index 05bc4ff9ba..8894710807 100644
--- a/src/openai/types/shared_params/__init__.py
+++ b/src/openai/types/shared_params/__init__.py
@@ -1,4 +1,14 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .chat_model import ChatModel as ChatModel
+from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py
new file mode 100644
index 0000000000..6cd8e7f91f
--- /dev/null
+++ b/src/openai/types/shared_params/chat_model.py
@@ -0,0 +1,65 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-audio-preview-2025-06-03",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
+    "chatgpt-4o-latest",
+    "codex-mini-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared_params/comparison_filter.py b/src/openai/types/shared_params/comparison_filter.py
new file mode 100644
index 0000000000..38edd315ed
--- /dev/null
+++ b/src/openai/types/shared_params/comparison_filter.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(TypedDict, total=False):
+    key: Required[str]
+    """The key to compare against the value."""
+
+    type: Required[Literal["eq", "ne", "gt", "gte", "lt", "lte"]]
+    """Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    """
+
+    value: Required[Union[str, float, bool]]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared_params/compound_filter.py b/src/openai/types/shared_params/compound_filter.py
new file mode 100644
index 0000000000..d12e9b1bda
--- /dev/null
+++ b/src/openai/types/shared_params/compound_filter.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(TypedDict, total=False):
+    filters: Required[Iterable[Filter]]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Required[Literal["and", "or"]]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared_params/function_definition.py b/src/openai/types/shared_params/function_definition.py
index 6bb6fa6ff2..d45ec13f1e 100644
--- a/src/openai/types/shared_params/function_definition.py
+++ b/src/openai/types/shared_params/function_definition.py
@@ -1,10 +1,11 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Required, TypedDict
 
-from ...types import shared_params
+from .function_parameters import FunctionParameters
 
 __all__ = ["FunctionDefinition"]
 
@@ -17,20 +18,28 @@ class FunctionDefinition(TypedDict, total=False):
     of 64.
     """
 
-    parameters: Required[shared_params.FunctionParameters]
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
+    Omitting `parameters` defines a function with an empty parameter list.
     """
 
-    description: str
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    strict: Optional[bool]
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](docs/guides/function-calling).
     """
diff --git a/src/openai/types/shared_params/function_parameters.py b/src/openai/types/shared_params/function_parameters.py
index a405f6b2e2..45fc742d3b 100644
--- a/src/openai/types/shared_params/function_parameters.py
+++ b/src/openai/types/shared_params/function_parameters.py
@@ -1,9 +1,10 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared_params/metadata.py b/src/openai/types/shared_params/metadata.py
new file mode 100644
index 0000000000..821650b48b
--- /dev/null
+++ b/src/openai/types/shared_params/metadata.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
new file mode 100644
index 0000000000..73e1a008df
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(TypedDict, total=False):
+    effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]]
+    """**Deprecated:** use `summary` instead.
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]]
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
+    """
diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py
new file mode 100644
index 0000000000..6052c5ae15
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning_effort.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["low", "medium", "high"]]
diff --git a/src/openai/types/shared_params/response_format_json_object.py b/src/openai/types/shared_params/response_format_json_object.py
new file mode 100644
index 0000000000..d4d1deaae5
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(TypedDict, total=False):
+    type: Required[Literal["json_object"]]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared_params/response_format_json_schema.py b/src/openai/types/shared_params/response_format_json_schema.py
new file mode 100644
index 0000000000..5b0a13ee06
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_schema.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(TypedDict, total=False):
+    name: Required[str]
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema: Dict[str, object]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(TypedDict, total=False):
+    json_schema: Required[JSONSchema]
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared_params/response_format_text.py b/src/openai/types/shared_params/response_format_text.py
new file mode 100644
index 0000000000..c3ef2b0816
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_text.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(TypedDict, total=False):
+    type: Required[Literal["text"]]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py
new file mode 100644
index 0000000000..adfcecf1e5
--- /dev/null
+++ b/src/openai/types/shared_params/responses_model.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..shared.chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+    ],
+]
diff --git a/src/openai/types/static_file_chunking_strategy.py b/src/openai/types/static_file_chunking_strategy.py
new file mode 100644
index 0000000000..cb842442c1
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["StaticFileChunkingStrategy"]
+
+
+class StaticFileChunkingStrategy(BaseModel):
+    chunk_overlap_tokens: int
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: int
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/static_file_chunking_strategy_object.py b/src/openai/types/static_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..2a95dce5b3
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .static_file_chunking_strategy import StaticFileChunkingStrategy
+
+__all__ = ["StaticFileChunkingStrategyObject"]
+
+
+class StaticFileChunkingStrategyObject(BaseModel):
+    static: StaticFileChunkingStrategy
+
+    type: Literal["static"]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_object_param.py b/src/openai/types/static_file_chunking_strategy_object_param.py
new file mode 100644
index 0000000000..0cdf35c0df
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam
+
+__all__ = ["StaticFileChunkingStrategyObjectParam"]
+
+
+class StaticFileChunkingStrategyObjectParam(TypedDict, total=False):
+    static: Required[StaticFileChunkingStrategyParam]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_param.py b/src/openai/types/static_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..f917ac5647
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["StaticFileChunkingStrategyParam"]
+
+
+class StaticFileChunkingStrategyParam(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/upload.py b/src/openai/types/upload.py
new file mode 100644
index 0000000000..914b69a863
--- /dev/null
+++ b/src/openai/types/upload.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .file_object import FileObject
+
+__all__ = ["Upload"]
+
+
+class Upload(BaseModel):
+    id: str
+    """The Upload unique identifier, which can be referenced in API endpoints."""
+
+    bytes: int
+    """The intended number of bytes to be uploaded."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Upload was created."""
+
+    expires_at: int
+    """The Unix timestamp (in seconds) for when the Upload will expire."""
+
+    filename: str
+    """The name of the file to be uploaded."""
+
+    object: Literal["upload"]
+    """The object type, which is always "upload"."""
+
+    purpose: str
+    """The intended purpose of the file.
+
+    [Please refer here](https://platform.openai.com/docs/api-reference/files/object#files/object-purpose)
+    for acceptable values.
+    """
+
+    status: Literal["pending", "completed", "cancelled", "expired"]
+    """The status of the Upload."""
+
+    file: Optional[FileObject] = None
+    """The `File` object represents a document that has been uploaded to OpenAI."""
diff --git a/src/openai/types/upload_complete_params.py b/src/openai/types/upload_complete_params.py
new file mode 100644
index 0000000000..cce568d5c6
--- /dev/null
+++ b/src/openai/types/upload_complete_params.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["UploadCompleteParams"]
+
+
+class UploadCompleteParams(TypedDict, total=False):
+    part_ids: Required[List[str]]
+    """The ordered list of Part IDs."""
+
+    md5: str
+    """
+    The optional md5 checksum for the file contents to verify if the bytes uploaded
+    matches what you expect.
+    """
diff --git a/src/openai/types/upload_create_params.py b/src/openai/types/upload_create_params.py
new file mode 100644
index 0000000000..2ebabe6c66
--- /dev/null
+++ b/src/openai/types/upload_create_params.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .file_purpose import FilePurpose
+
+__all__ = ["UploadCreateParams"]
+
+
+class UploadCreateParams(TypedDict, total=False):
+    bytes: Required[int]
+    """The number of bytes in the file you are uploading."""
+
+    filename: Required[str]
+    """The name of the file to upload."""
+
+    mime_type: Required[str]
+    """The MIME type of the file.
+
+    This must fall within the supported MIME types for your file purpose. See the
+    supported MIME types for assistants and vision.
+    """
+
+    purpose: Required[FilePurpose]
+    """The intended purpose of the uploaded file.
+
+    See the
+    [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+    """
diff --git a/src/openai/types/uploads/__init__.py b/src/openai/types/uploads/__init__.py
new file mode 100644
index 0000000000..41deb0ab4b
--- /dev/null
+++ b/src/openai/types/uploads/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .upload_part import UploadPart as UploadPart
+from .part_create_params import PartCreateParams as PartCreateParams
diff --git a/src/openai/types/uploads/part_create_params.py b/src/openai/types/uploads/part_create_params.py
new file mode 100644
index 0000000000..9851ca41e9
--- /dev/null
+++ b/src/openai/types/uploads/part_create_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["PartCreateParams"]
+
+
+class PartCreateParams(TypedDict, total=False):
+    data: Required[FileTypes]
+    """The chunk of bytes for this Part."""
diff --git a/src/openai/types/uploads/upload_part.py b/src/openai/types/uploads/upload_part.py
new file mode 100644
index 0000000000..e09621d8f9
--- /dev/null
+++ b/src/openai/types/uploads/upload_part.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["UploadPart"]
+
+
+class UploadPart(BaseModel):
+    id: str
+    """The upload Part unique identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Part was created."""
+
+    object: Literal["upload.part"]
+    """The object type, which is always `upload.part`."""
+
+    upload_id: str
+    """The ID of the Upload object that this Part was added to."""
diff --git a/src/openai/types/vector_store.py b/src/openai/types/vector_store.py
new file mode 100644
index 0000000000..2473a442d2
--- /dev/null
+++ b/src/openai/types/vector_store.py
@@ -0,0 +1,82 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["VectorStore", "FileCounts", "ExpiresAfter"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that were cancelled."""
+
+    completed: int
+    """The number of files that have been successfully processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class ExpiresAfter(BaseModel):
+    anchor: Literal["last_active_at"]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: int
+    """The number of days after the anchor time that the vector store will expire."""
+
+
+class VectorStore(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store was created."""
+
+    file_counts: FileCounts
+
+    last_active_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store was last active."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+    object: Literal["vector_store"]
+    """The object type, which is always `vector_store`."""
+
+    status: Literal["expired", "in_progress", "completed"]
+    """
+    The status of the vector store, which can be either `expired`, `in_progress`, or
+    `completed`. A status of `completed` indicates that the vector store is ready
+    for use.
+    """
+
+    usage_bytes: int
+    """The total number of bytes used by the files in the vector store."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """The expiration policy for a vector store."""
+
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the vector store will expire."""
diff --git a/src/openai/types/vector_store_create_params.py b/src/openai/types/vector_store_create_params.py
new file mode 100644
index 0000000000..365d0936b1
--- /dev/null
+++ b/src/openai/types/vector_store_create_params.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .shared_params.metadata import Metadata
+from .file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
+
+
+class VectorStoreCreateParams(TypedDict, total=False):
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
+    expires_after: ExpiresAfter
+    """The expiration policy for a vector store."""
+
+    file_ids: List[str]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/src/openai/types/vector_store_deleted.py b/src/openai/types/vector_store_deleted.py
new file mode 100644
index 0000000000..dfac9ce8bd
--- /dev/null
+++ b/src/openai/types/vector_store_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["VectorStoreDeleted"]
+
+
+class VectorStoreDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.deleted"]
diff --git a/src/openai/types/beta/assistants/file_list_params.py b/src/openai/types/vector_store_list_params.py
similarity index 79%
rename from src/openai/types/beta/assistants/file_list_params.py
rename to src/openai/types/vector_store_list_params.py
index 397e35a0d1..e26ff90a85 100644
--- a/src/openai/types/beta/assistants/file_list_params.py
+++ b/src/openai/types/vector_store_list_params.py
@@ -1,13 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 from typing_extensions import Literal, TypedDict
 
-__all__ = ["FileListParams"]
+__all__ = ["VectorStoreListParams"]
 
 
-class FileListParams(TypedDict, total=False):
+class VectorStoreListParams(TypedDict, total=False):
     after: str
     """A cursor for use in pagination.
 
@@ -21,7 +21,7 @@ class FileListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_store_search_params.py b/src/openai/types/vector_store_search_params.py
new file mode 100644
index 0000000000..17573d0f61
--- /dev/null
+++ b/src/openai/types/vector_store_search_params.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .shared_params.compound_filter import CompoundFilter
+from .shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["VectorStoreSearchParams", "Filters", "RankingOptions"]
+
+
+class VectorStoreSearchParams(TypedDict, total=False):
+    query: Required[Union[str, List[str]]]
+    """A query string for a search"""
+
+    filters: Filters
+    """A filter to apply based on file attributes."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
+
+    rewrite_query: bool
+    """Whether to rewrite the natural language query for vector search."""
+
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(TypedDict, total=False):
+    ranker: Literal["auto", "default-2024-11-15"]
+
+    score_threshold: float
diff --git a/src/openai/types/vector_store_search_response.py b/src/openai/types/vector_store_search_response.py
new file mode 100644
index 0000000000..d78b71bfba
--- /dev/null
+++ b/src/openai/types/vector_store_search_response.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["VectorStoreSearchResponse", "Content"]
+
+
+class Content(BaseModel):
+    text: str
+    """The text content returned from search."""
+
+    type: Literal["text"]
+    """The type of content."""
+
+
+class VectorStoreSearchResponse(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    content: List[Content]
+    """Content chunks from the file."""
+
+    file_id: str
+    """The ID of the vector store file."""
+
+    filename: str
+    """The name of the vector store file."""
+
+    score: float
+    """The similarity score for the result."""
diff --git a/src/openai/types/vector_store_update_params.py b/src/openai/types/vector_store_update_params.py
new file mode 100644
index 0000000000..4f6ac63963
--- /dev/null
+++ b/src/openai/types/vector_store_update_params.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["VectorStoreUpdateParams", "ExpiresAfter"]
+
+
+class VectorStoreUpdateParams(TypedDict, total=False):
+    expires_after: Optional[ExpiresAfter]
+    """The expiration policy for a vector store."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: Optional[str]
+    """The name of the vector store."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    anchor: Required[Literal["last_active_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `last_active_at`.
+    """
+
+    days: Required[int]
+    """The number of days after the anchor time that the vector store will expire."""
diff --git a/src/openai/types/vector_stores/__init__.py b/src/openai/types/vector_stores/__init__.py
new file mode 100644
index 0000000000..96ce301481
--- /dev/null
+++ b/src/openai/types/vector_stores/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .file_list_params import FileListParams as FileListParams
+from .vector_store_file import VectorStoreFile as VectorStoreFile
+from .file_create_params import FileCreateParams as FileCreateParams
+from .file_update_params import FileUpdateParams as FileUpdateParams
+from .file_content_response import FileContentResponse as FileContentResponse
+from .vector_store_file_batch import VectorStoreFileBatch as VectorStoreFileBatch
+from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
+from .vector_store_file_deleted import VectorStoreFileDeleted as VectorStoreFileDeleted
+from .file_batch_list_files_params import FileBatchListFilesParams as FileBatchListFilesParams
diff --git a/src/openai/types/vector_stores/file_batch_create_params.py b/src/openai/types/vector_stores/file_batch_create_params.py
new file mode 100644
index 0000000000..1a470f757a
--- /dev/null
+++ b/src/openai/types/vector_stores/file_batch_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Required, TypedDict
+
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileBatchCreateParams"]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[List[str]]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/vector_stores/file_batch_list_files_params.py b/src/openai/types/vector_stores/file_batch_list_files_params.py
new file mode 100644
index 0000000000..2a0a6c6aa7
--- /dev/null
+++ b/src/openai/types/vector_stores/file_batch_list_files_params.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FileBatchListFilesParams"]
+
+
+class FileBatchListFilesParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, starting with obj_foo, your
+    subsequent call can include before=obj_foo in order to fetch the previous page
+    of the list.
+    """
+
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/vector_stores/file_content_response.py b/src/openai/types/vector_stores/file_content_response.py
new file mode 100644
index 0000000000..32db2f2ce9
--- /dev/null
+++ b/src/openai/types/vector_stores/file_content_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FileContentResponse"]
+
+
+class FileContentResponse(BaseModel):
+    text: Optional[str] = None
+    """The text content"""
+
+    type: Optional[str] = None
+    """The content type (currently only `"text"`)"""
diff --git a/src/openai/types/vector_stores/file_create_params.py b/src/openai/types/vector_stores/file_create_params.py
new file mode 100644
index 0000000000..5b8989251a
--- /dev/null
+++ b/src/openai/types/vector_stores/file_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/beta/threads/messages/file_list_params.py b/src/openai/types/vector_stores/file_list_params.py
similarity index 71%
rename from src/openai/types/beta/threads/messages/file_list_params.py
rename to src/openai/types/vector_stores/file_list_params.py
index 3640b8508b..867b5fb3bb 100644
--- a/src/openai/types/beta/threads/messages/file_list_params.py
+++ b/src/openai/types/vector_stores/file_list_params.py
@@ -1,15 +1,13 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["FileListParams"]
 
 
 class FileListParams(TypedDict, total=False):
-    thread_id: Required[str]
-
     after: str
     """A cursor for use in pagination.
 
@@ -23,11 +21,17 @@ class FileListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
 
+    filter: Literal["in_progress", "completed", "failed", "cancelled"]
+    """Filter by file status.
+
+    One of `in_progress`, `completed`, `failed`, `cancelled`.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/vector_stores/file_update_params.py b/src/openai/types/vector_stores/file_update_params.py
new file mode 100644
index 0000000000..ebf540d046
--- /dev/null
+++ b/src/openai/types/vector_stores/file_update_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileUpdateParams"]
+
+
+class FileUpdateParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    attributes: Required[Optional[Dict[str, Union[str, float, bool]]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
diff --git a/src/openai/types/vector_stores/vector_store_file.py b/src/openai/types/vector_stores/vector_store_file.py
new file mode 100644
index 0000000000..b59a61dfb0
--- /dev/null
+++ b/src/openai/types/vector_stores/vector_store_file.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..file_chunking_strategy import FileChunkingStrategy
+
+__all__ = ["VectorStoreFile", "LastError"]
+
+
+class LastError(BaseModel):
+    code: Literal["server_error", "unsupported_file", "invalid_file"]
+    """One of `server_error` or `rate_limit_exceeded`."""
+
+    message: str
+    """A human-readable description of the error."""
+
+
+class VectorStoreFile(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the vector store file was created."""
+
+    last_error: Optional[LastError] = None
+    """The last error associated with this vector store file.
+
+    Will be `null` if there are no errors.
+    """
+
+    object: Literal["vector_store.file"]
+    """The object type, which is always `vector_store.file`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store file, which can be either `in_progress`,
+    `completed`, `cancelled`, or `failed`. The status `completed` indicates that the
+    vector store file is ready for use.
+    """
+
+    usage_bytes: int
+    """The total vector store usage in bytes.
+
+    Note that this may be different from the original file size.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: Optional[FileChunkingStrategy] = None
+    """The strategy used to chunk the file."""
diff --git a/src/openai/types/vector_stores/vector_store_file_batch.py b/src/openai/types/vector_stores/vector_store_file_batch.py
new file mode 100644
index 0000000000..57dbfbd809
--- /dev/null
+++ b/src/openai/types/vector_stores/vector_store_file_batch.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreFileBatch", "FileCounts"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """The number of files that where cancelled."""
+
+    completed: int
+    """The number of files that have been processed."""
+
+    failed: int
+    """The number of files that have failed to process."""
+
+    in_progress: int
+    """The number of files that are currently being processed."""
+
+    total: int
+    """The total number of files."""
+
+
+class VectorStoreFileBatch(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """
+    The Unix timestamp (in seconds) for when the vector store files batch was
+    created.
+    """
+
+    file_counts: FileCounts
+
+    object: Literal["vector_store.files_batch"]
+    """The object type, which is always `vector_store.file_batch`."""
+
+    status: Literal["in_progress", "completed", "cancelled", "failed"]
+    """
+    The status of the vector store files batch, which can be either `in_progress`,
+    `completed`, `cancelled` or `failed`.
+    """
+
+    vector_store_id: str
+    """
+    The ID of the
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    that the [File](https://platform.openai.com/docs/api-reference/files) is
+    attached to.
+    """
diff --git a/src/openai/types/vector_stores/vector_store_file_deleted.py b/src/openai/types/vector_stores/vector_store_file_deleted.py
new file mode 100644
index 0000000000..5c856f26cd
--- /dev/null
+++ b/src/openai/types/vector_stores/vector_store_file_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreFileDeleted"]
+
+
+class VectorStoreFileDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["vector_store.file.deleted"]
diff --git a/src/openai/types/webhooks/__init__.py b/src/openai/types/webhooks/__init__.py
new file mode 100644
index 0000000000..9caad38c82
--- /dev/null
+++ b/src/openai/types/webhooks/__init__.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
+from .batch_failed_webhook_event import BatchFailedWebhookEvent as BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent as BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent as BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent as BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent as EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent as ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent as EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent as EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent as ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent as ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import (
+    FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
+)
+from .fine_tuning_job_succeeded_webhook_event import (
+    FineTuningJobSucceededWebhookEvent as FineTuningJobSucceededWebhookEvent,
+)
diff --git a/src/openai/types/webhooks/batch_cancelled_webhook_event.py b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
new file mode 100644
index 0000000000..4bbd7307a5
--- /dev/null
+++ b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.cancelled"]
+    """The type of the event. Always `batch.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_completed_webhook_event.py b/src/openai/types/webhooks/batch_completed_webhook_event.py
new file mode 100644
index 0000000000..a47ca156fa
--- /dev/null
+++ b/src/openai/types/webhooks/batch_completed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCompletedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.completed"]
+    """The type of the event. Always `batch.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_expired_webhook_event.py b/src/openai/types/webhooks/batch_expired_webhook_event.py
new file mode 100644
index 0000000000..e91001e8d8
--- /dev/null
+++ b/src/openai/types/webhooks/batch_expired_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchExpiredWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchExpiredWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request expired."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.expired"]
+    """The type of the event. Always `batch.expired`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_failed_webhook_event.py b/src/openai/types/webhooks/batch_failed_webhook_event.py
new file mode 100644
index 0000000000..ef80863edb
--- /dev/null
+++ b/src/openai/types/webhooks/batch_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.failed"]
+    """The type of the event. Always `batch.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_canceled_webhook_event.py b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
new file mode 100644
index 0000000000..855359f743
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunCanceledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunCanceledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run was canceled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.canceled"]
+    """The type of the event. Always `eval.run.canceled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_failed_webhook_event.py b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
new file mode 100644
index 0000000000..7671680720
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.failed"]
+    """The type of the event. Always `eval.run.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
new file mode 100644
index 0000000000..d0d1fc2b04
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunSucceededWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.succeeded"]
+    """The type of the event. Always `eval.run.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
new file mode 100644
index 0000000000..1fe3c06096
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.cancelled"]
+    """The type of the event. Always `fine_tuning.job.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
new file mode 100644
index 0000000000..71d899c8ef
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.failed"]
+    """The type of the event. Always `fine_tuning.job.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
new file mode 100644
index 0000000000..470f1fcfaa
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobSucceededWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.succeeded"]
+    """The type of the event. Always `fine_tuning.job.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_cancelled_webhook_event.py b/src/openai/types/webhooks/response_cancelled_webhook_event.py
new file mode 100644
index 0000000000..443e360e90
--- /dev/null
+++ b/src/openai/types/webhooks/response_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCancelledWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.cancelled"]
+    """The type of the event. Always `response.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_completed_webhook_event.py b/src/openai/types/webhooks/response_completed_webhook_event.py
new file mode 100644
index 0000000000..ac1feff32b
--- /dev/null
+++ b/src/openai/types/webhooks/response_completed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCompletedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_failed_webhook_event.py b/src/openai/types/webhooks/response_failed_webhook_event.py
new file mode 100644
index 0000000000..5b4ba65e18
--- /dev/null
+++ b/src/openai/types/webhooks/response_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseFailedWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_incomplete_webhook_event.py b/src/openai/types/webhooks/response_incomplete_webhook_event.py
new file mode 100644
index 0000000000..01609314e0
--- /dev/null
+++ b/src/openai/types/webhooks/response_incomplete_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseIncompleteWebhookEvent(BaseModel):
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was interrupted."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/unwrap_webhook_event.py b/src/openai/types/webhooks/unwrap_webhook_event.py
new file mode 100644
index 0000000000..91091af32f
--- /dev/null
+++ b/src/openai/types/webhooks/unwrap_webhook_event.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .batch_failed_webhook_event import BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import FineTuningJobCancelledWebhookEvent
+from .fine_tuning_job_succeeded_webhook_event import FineTuningJobSucceededWebhookEvent
+
+__all__ = ["UnwrapWebhookEvent"]
+
+UnwrapWebhookEvent: TypeAlias = Annotated[
+    Union[
+        BatchCancelledWebhookEvent,
+        BatchCompletedWebhookEvent,
+        BatchExpiredWebhookEvent,
+        BatchFailedWebhookEvent,
+        EvalRunCanceledWebhookEvent,
+        EvalRunFailedWebhookEvent,
+        EvalRunSucceededWebhookEvent,
+        FineTuningJobCancelledWebhookEvent,
+        FineTuningJobFailedWebhookEvent,
+        FineTuningJobSucceededWebhookEvent,
+        ResponseCancelledWebhookEvent,
+        ResponseCompletedWebhookEvent,
+        ResponseFailedWebhookEvent,
+        ResponseIncompleteWebhookEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/websocket_connection_options.py b/src/openai/types/websocket_connection_options.py
new file mode 100644
index 0000000000..40fd24ab03
--- /dev/null
+++ b/src/openai/types/websocket_connection_options.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Sequence, TypedDict
+
+if TYPE_CHECKING:
+    from websockets import Subprotocol
+    from websockets.extensions import ClientExtensionFactory
+
+
+class WebsocketConnectionOptions(TypedDict, total=False):
+    """Websocket connection options copied from `websockets`.
+
+    For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect
+    """
+
+    extensions: Sequence[ClientExtensionFactory] | None
+    """List of supported extensions, in order in which they should be negotiated and run."""
+
+    subprotocols: Sequence[Subprotocol] | None
+    """List of supported subprotocols, in order of decreasing preference."""
+
+    compression: str | None
+    """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details."""
+
+    # limits
+    max_size: int | None
+    """Maximum size of incoming messages in bytes. None disables the limit."""
+
+    max_queue: int | None | tuple[int | None, int | None]
+    """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea."""
+
+    write_limit: int | tuple[int, int | None]
+    """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks."""
diff --git a/tests/__init__.py b/tests/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/__init__.py b/tests/api_resources/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/__init__.py
+++ b/tests/api_resources/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/audio/__init__.py b/tests/api_resources/audio/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/audio/__init__.py
+++ b/tests/api_resources/audio/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/audio/test_speech.py b/tests/api_resources/audio/test_speech.py
index 50b00b73b4..2c77f38949 100644
--- a/tests/api_resources/audio/test_speech.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -1,27 +1,26 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import httpx
 import pytest
 from respx import MockRouter
 
+import openai._legacy_response as _legacy_response
 from openai import OpenAI, AsyncOpenAI
-from openai._types import BinaryResponseContent
-from openai._client import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+
+# pyright: reportDeprecated=false
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestSpeech:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
@@ -29,88 +28,123 @@ def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         speech = client.audio.speech.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        speech = respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        client.audio.speech.create(
+        speech = client.audio.speech.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
+            instructions="instructions",
             response_format="mp3",
             speed=0.25,
+            stream_format="sse",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = client.audio.speech.with_raw_response.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         speech = response.parse()
-        assert isinstance(speech, BinaryResponseContent)
-        assert speech.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, speech, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.audio.speech.with_streaming_response.create(
+            input="string",
+            model="string",
+            voice="ash",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            speech = response.parse()
+            assert_matches_type(bytes, speech, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncSpeech:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
-    async def test_method_create(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+    async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        speech = await client.audio.speech.create(
+        speech = await async_client.audio.speech.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        speech = respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        await client.audio.speech.create(
+        speech = await async_client.audio.speech.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
+            instructions="instructions",
             response_format="mp3",
             speed=0.25,
+            stream_format="sse",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
-    async def test_raw_response_create(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+    async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        response = await client.audio.speech.with_raw_response.create(
+
+        response = await async_client.audio.speech.with_raw_response.create(
             input="string",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         speech = response.parse()
-        assert isinstance(speech, BinaryResponseContent)
-        assert speech.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, speech, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with async_client.audio.speech.with_streaming_response.create(
+            input="string",
+            model="string",
+            voice="ash",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            speech = await response.parse()
+            assert_matches_type(bytes, speech, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/audio/test_transcriptions.py b/tests/api_resources/audio/test_transcriptions.py
index aefdf1790f..11cbe2349c 100644
--- a/tests/api_resources/audio/test_transcriptions.py
+++ b/tests/api_resources/audio/test_transcriptions.py
@@ -1,87 +1,228 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.types.audio import Transcription
+from openai.types.audio import TranscriptionCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestTranscriptions:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
-            language="string",
-            prompt="string",
+            model="gpt-4o-transcribe",
+            chunking_strategy="auto",
+            include=["logprobs"],
+            language="language",
+            prompt="prompt",
             response_format="json",
+            stream=False,
             temperature=0,
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
         response = client.audio.transcriptions.with_raw_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription = response.parse()
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        transcription_stream = client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+        transcription_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        transcription_stream = client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+            chunking_strategy="auto",
+            include=["logprobs"],
+            language="language",
+            prompt="prompt",
+            response_format="json",
+            temperature=0,
+            timestamp_granularities=["word"],
+        )
+        transcription_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.audio.transcriptions.with_raw_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncTranscriptions:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        transcription = await client.audio.transcriptions.create(
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        transcription = await client.audio.transcriptions.create(
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
-            language="string",
-            prompt="string",
+            model="gpt-4o-transcribe",
+            chunking_strategy="auto",
+            include=["logprobs"],
+            language="language",
+            prompt="prompt",
             response_format="json",
+            stream=False,
             temperature=0,
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.audio.transcriptions.with_raw_response.create(
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.audio.transcriptions.with_raw_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription = await response.parse()
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        transcription_stream = await async_client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+        await transcription_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        transcription_stream = await async_client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+            chunking_strategy="auto",
+            include=["logprobs"],
+            language="language",
+            prompt="prompt",
+            response_format="json",
+            temperature=0,
+            timestamp_granularities=["word"],
+        )
+        await transcription_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.audio.transcriptions.with_raw_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/audio/test_translations.py b/tests/api_resources/audio/test_translations.py
index 0657e80eb8..ead69e9369 100644
--- a/tests/api_resources/audio/test_translations.py
+++ b/tests/api_resources/audio/test_translations.py
@@ -1,24 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.types.audio import Translation
+from openai.types.audio import TranslationCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestTranslations:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
@@ -26,18 +23,18 @@ def test_method_create(self, client: OpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         translation = client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
@@ -45,41 +42,73 @@ def test_raw_response_create(self, client: OpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.audio.translations.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            translation = response.parse()
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncTranslations:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        translation = await client.audio.translations.create(
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        translation = await async_client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        translation = await client.audio.translations.create(
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        translation = await async_client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.audio.translations.with_raw_response.create(
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.audio.translations.with_raw_response.create(
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.audio.translations.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            translation = await response.parse()
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/__init__.py
+++ b/tests/api_resources/beta/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/assistants/__init__.py b/tests/api_resources/beta/assistants/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/assistants/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/assistants/test_files.py b/tests/api_resources/beta/assistants/test_files.py
deleted file mode 100644
index 2545640c57..0000000000
--- a/tests/api_resources/beta/assistants/test_files.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import os
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.assistants import AssistantFile, FileDeleteResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
-
-
-class TestFiles:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            file_id="string",
-        )
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            file_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.retrieve(
-            "string",
-            assistant_id="string",
-        )
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.retrieve(
-            "string",
-            assistant_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.list(
-            "string",
-        )
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.list(
-            "string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    def test_method_delete(self, client: OpenAI) -> None:
-        file = client.beta.assistants.files.delete(
-            "string",
-            assistant_id="string",
-        )
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.assistants.files.with_raw_response.delete(
-            "string",
-            assistant_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
-
-
-class TestAsyncFiles:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.assistants.files.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            file_id="string",
-        )
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.files.with_raw_response.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            file_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.assistants.files.retrieve(
-            "string",
-            assistant_id="string",
-        )
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.files.with_raw_response.retrieve(
-            "string",
-            assistant_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AssistantFile, file, path=["response"])
-
-    @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.assistants.files.list(
-            "string",
-        )
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.assistants.files.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.files.with_raw_response.list(
-            "string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
-
-    @parametrize
-    async def test_method_delete(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.assistants.files.delete(
-            "string",
-            assistant_id="string",
-        )
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_delete(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.files.with_raw_response.delete(
-            "string",
-            assistant_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(FileDeleteResponse, file, path=["response"])
diff --git a/tests/api_resources/beta/chat/__init__.py b/tests/api_resources/beta/chat/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/chat/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/realtime/__init__.py b/tests/api_resources/beta/realtime/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/beta/realtime/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/realtime/test_sessions.py b/tests/api_resources/beta/realtime/test_sessions.py
new file mode 100644
index 0000000000..3c55abf80c
--- /dev/null
+++ b/tests/api_resources/beta/realtime/test_sessions.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import SessionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            client_secret={
+                "expires_after": {
+                    "anchor": "created_at",
+                    "seconds": 0,
+                }
+            },
+            input_audio_format="pcm16",
+            input_audio_noise_reduction={"type": "near_field"},
+            input_audio_transcription={
+                "language": "language",
+                "model": "model",
+                "prompt": "prompt",
+            },
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            model="gpt-4o-realtime-preview",
+            output_audio_format="pcm16",
+            speed=0.25,
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            tracing="auto",
+            turn_detection={
+                "create_response": True,
+                "eagerness": "low",
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "server_vad",
+            },
+            voice="ash",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.realtime.sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.realtime.sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            client_secret={
+                "expires_after": {
+                    "anchor": "created_at",
+                    "seconds": 0,
+                }
+            },
+            input_audio_format="pcm16",
+            input_audio_noise_reduction={"type": "near_field"},
+            input_audio_transcription={
+                "language": "language",
+                "model": "model",
+                "prompt": "prompt",
+            },
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            model="gpt-4o-realtime-preview",
+            output_audio_format="pcm16",
+            speed=0.25,
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            tracing="auto",
+            turn_detection={
+                "create_response": True,
+                "eagerness": "low",
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "server_vad",
+            },
+            voice="ash",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.realtime.sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.realtime.sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/realtime/test_transcription_sessions.py b/tests/api_resources/beta/realtime/test_transcription_sessions.py
new file mode 100644
index 0000000000..ac52489e74
--- /dev/null
+++ b/tests/api_resources/beta/realtime/test_transcription_sessions.py
@@ -0,0 +1,134 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import TranscriptionSession
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestTranscriptionSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        transcription_session = client.beta.realtime.transcription_sessions.create()
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        transcription_session = client.beta.realtime.transcription_sessions.create(
+            client_secret={
+                "expires_at": {
+                    "anchor": "created_at",
+                    "seconds": 0,
+                }
+            },
+            include=["string"],
+            input_audio_format="pcm16",
+            input_audio_noise_reduction={"type": "near_field"},
+            input_audio_transcription={
+                "language": "language",
+                "model": "gpt-4o-transcribe",
+                "prompt": "prompt",
+            },
+            modalities=["text"],
+            turn_detection={
+                "create_response": True,
+                "eagerness": "low",
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "server_vad",
+            },
+        )
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.realtime.transcription_sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        transcription_session = response.parse()
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.realtime.transcription_sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription_session = response.parse()
+            assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncTranscriptionSessions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        transcription_session = await async_client.beta.realtime.transcription_sessions.create()
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        transcription_session = await async_client.beta.realtime.transcription_sessions.create(
+            client_secret={
+                "expires_at": {
+                    "anchor": "created_at",
+                    "seconds": 0,
+                }
+            },
+            include=["string"],
+            input_audio_format="pcm16",
+            input_audio_noise_reduction={"type": "near_field"},
+            input_audio_transcription={
+                "language": "language",
+                "model": "gpt-4o-transcribe",
+                "prompt": "prompt",
+            },
+            modalities=["text"],
+            turn_detection={
+                "create_response": True,
+                "eagerness": "low",
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "server_vad",
+            },
+        )
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.realtime.transcription_sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        transcription_session = response.parse()
+        assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.realtime.transcription_sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription_session = await response.parse()
+            assert_matches_type(TranscriptionSession, transcription_session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 82e975b46d..8aeb654e38 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -1,101 +1,183 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta import Assistant, AssistantDeleted
+from openai.types.beta import (
+    Assistant,
+    AssistantDeleted,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestAssistants:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
-            description="string",
-            file_ids=["string", "string", "string"],
-            instructions="string",
-            metadata={},
-            name="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                },
+            },
+            tools=[{"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4o",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.create(
+            model="gpt-4o",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.retrieve(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.retrieve(
-            "string",
+            "assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.retrieve(
+            "assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            client.beta.assistants.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
-            description="string",
-            file_ids=["string", "string", "string"],
-            instructions="string",
-            metadata={},
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
+            tools=[{"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.update(
-            "string",
+            assistant_id="assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.update(
+            assistant_id="assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            client.beta.assistants.with_raw_response.update(
+                assistant_id="",
+            )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.list()
@@ -104,8 +186,8 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.list(
-            after="string",
-            before="string",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -114,141 +196,293 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(SyncCursorPage[Assistant], assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(SyncCursorPage[Assistant], assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.delete(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.delete(
-            "string",
+            "assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.delete(
+            "assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            client.beta.assistants.with_raw_response.delete(
+                "",
+            )
+
 
 class TestAsyncAssistants:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.create(
-            model="string",
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.create(
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.create(
-            model="string",
-            description="string",
-            file_ids=["string", "string", "string"],
-            instructions="string",
-            metadata={},
-            name="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.create(
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string"]},
+                "file_search": {
+                    "vector_store_ids": ["string"],
+                    "vector_stores": [
+                        {
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                },
+            },
+            tools=[{"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.with_raw_response.create(
-            model="string",
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.assistants.with_raw_response.create(
+            model="gpt-4o",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.retrieve(
-            "string",
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.assistants.with_streaming_response.create(
+            model="gpt-4o",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.retrieve(
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.with_raw_response.retrieve(
-            "string",
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.assistants.with_raw_response.retrieve(
+            "assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_method_update(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.update(
-            "string",
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.assistants.with_streaming_response.retrieve(
+            "assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            await async_client.beta.assistants.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.update(
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_method_update_with_all_params(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.update(
-            "string",
-            description="string",
-            file_ids=["string", "string", "string"],
-            instructions="string",
-            metadata={},
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.update(
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
+            temperature=1,
+            tool_resources={
+                "code_interpreter": {"file_ids": ["string"]},
+                "file_search": {"vector_store_ids": ["string"]},
+            },
+            tools=[{"type": "code_interpreter"}],
+            top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_raw_response_update(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.with_raw_response.update(
-            "string",
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.assistants.with_raw_response.update(
+            assistant_id="assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.list()
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.assistants.with_streaming_response.update(
+            assistant_id="assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            await async_client.beta.assistants.with_raw_response.update(
+                assistant_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.list()
         assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.list(
-            after="string",
-            before="string",
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.list(
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
         assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.with_raw_response.list()
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.assistants.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
 
     @parametrize
-    async def test_method_delete(self, client: AsyncOpenAI) -> None:
-        assistant = await client.beta.assistants.delete(
-            "string",
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.assistants.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        assistant = await async_client.beta.assistants.delete(
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
-    async def test_raw_response_delete(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.assistants.with_raw_response.delete(
-            "string",
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.assistants.with_raw_response.delete(
+            "assistant_id",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.assistants.with_streaming_response.delete(
+            "assistant_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
+            await async_client.beta.assistants.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/beta/test_realtime.py b/tests/api_resources/beta/test_realtime.py
new file mode 100644
index 0000000000..2b0c7f7d8d
--- /dev/null
+++ b/tests/api_resources/beta/test_realtime.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestRealtime:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index 8fa1fc20ea..f392c86729 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -1,318 +1,820 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+    Thread,
+    ThreadDeleted,
+)
 from openai.types.beta.threads import Run
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestThreads:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create()
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create()
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
+                            }
+                        ],
+                    },
                 },
-            ],
-            metadata={},
-        )
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.create()
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create() as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        thread = client.beta.threads.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.retrieve(
+                "thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.retrieve(
+                "thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.retrieve(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.retrieve(
+                    "",
+                )
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        thread = client.beta.threads.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.update(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        thread = client.beta.threads.update(
-            "string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.update(
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.update(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.update(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.update(
+                    thread_id="",
+                )
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        thread = client.beta.threads.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.delete(
+                "thread_id",
+            )
+
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.delete(
+                "thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
-    def test_method_create_and_run(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create_and_run(
-            assistant_id="string",
-        )
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.delete(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = response.parse()
+                assert_matches_type(ThreadDeleted, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.delete(
+                    "",
+                )
+
+    @parametrize
+    def test_method_create_and_run_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_method_create_and_run_with_all_params(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
-            metadata={},
-            model="string",
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
+    def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
-                    },
-                ],
-                "metadata": {},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-        )
+                },
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_raw_response_create_and_run(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-        )
+    def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Run, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-class TestAsyncThreads:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+                thread = response.parse()
+                assert_matches_type(Run, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.create()
-        assert_matches_type(Thread, thread, path=["response"])
+    def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread_stream = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        thread_stream.response.close()
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
+    def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread_stream = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
+                    },
                 },
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
                 },
-                {
-                    "role": "user",
-                    "content": "x",
-                    "file_ids": ["string"],
-                    "metadata": {},
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
                 },
-            ],
-            metadata={},
-        )
+            )
+
+        thread_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = response.parse()
+                stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncThreads:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create()
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.with_raw_response.create()
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
+                            }
+                        ],
+                    },
+                },
+            )
+
+        assert_matches_type(Thread, thread, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.retrieve(
-            "string",
-        )
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create() as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.retrieve(
+                "thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.with_raw_response.retrieve(
-            "string",
-        )
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.retrieve(
+                "thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_method_update(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.update(
-            "string",
-        )
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.retrieve(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.retrieve(
+                    "",
+                )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.update(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_method_update_with_all_params(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.update(
-            "string",
-            metadata={},
-        )
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.update(
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_update(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.with_raw_response.update(
-            "string",
-        )
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.update(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
-    async def test_method_delete(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.delete(
-            "string",
-        )
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.update(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.update(
+                    thread_id="",
+                )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.delete(
+                "thread_id",
+            )
+
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_delete(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.with_raw_response.delete(
-            "string",
-        )
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.delete(
+                "thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
-    async def test_method_create_and_run(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.create_and_run(
-            assistant_id="string",
-        )
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.delete(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = await response.parse()
+                assert_matches_type(ThreadDeleted, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.delete(
+                    "",
+                )
+
+    @parametrize
+    async def test_method_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_method_create_and_run_with_all_params(self, client: AsyncOpenAI) -> None:
-        thread = await client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
-            metadata={},
-            model="string",
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
+    async def test_method_create_and_run_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "x",
-                        "file_ids": ["string"],
-                        "metadata": {},
-                    },
-                ],
-                "metadata": {},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-        )
+                },
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_and_run(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-        )
+    async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Run, thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                thread = await response.parse()
+                assert_matches_type(Run, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread_stream = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_and_run_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            thread_stream = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
+                    },
+                },
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = await response.parse()
+                await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/threads/__init__.py b/tests/api_resources/beta/threads/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/threads/__init__.py
+++ b/tests/api_resources/beta/threads/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/threads/messages/__init__.py b/tests/api_resources/beta/threads/messages/__init__.py
deleted file mode 100644
index 1016754ef3..0000000000
--- a/tests/api_resources/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
diff --git a/tests/api_resources/beta/threads/messages/test_files.py b/tests/api_resources/beta/threads/messages/test_files.py
deleted file mode 100644
index a5b68713e6..0000000000
--- a/tests/api_resources/beta/threads/messages/test_files.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import os
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads.messages import MessageFile
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
-
-
-class TestFiles:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.files.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
-
-
-class TestAsyncFiles:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.threads.messages.files.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(MessageFile, file, path=["response"])
-
-    @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        file = await client.beta.threads.messages.files.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.files.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
diff --git a/tests/api_resources/beta/threads/runs/__init__.py b/tests/api_resources/beta/threads/runs/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/beta/threads/runs/__init__.py
+++ b/tests/api_resources/beta/threads/runs/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/threads/runs/test_steps.py b/tests/api_resources/beta/threads/runs/test_steps.py
index 3f4f8c1022..ba44eec63d 100644
--- a/tests/api_resources/beta/threads/runs/test_steps.py
+++ b/tests/api_resources/beta/threads/runs/test_steps.py
@@ -1,128 +1,317 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncCursorPage, AsyncCursorPage
 from openai.types.beta.threads.runs import RunStep
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestSteps:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+            )
+
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(RunStep, step, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.steps.with_streaming_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = response.parse()
+                assert_matches_type(RunStep, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="",
+                    run_id="run_id",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="thread_id",
+                    run_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="",
+                    thread_id="thread_id",
+                    run_id="run_id",
+                )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.steps.with_raw_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.steps.with_streaming_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = response.parse()
+                assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
 
 class TestAsyncSteps:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        step = await client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+            )
+
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        step = await client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-        )
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.steps.with_streaming_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = await response.parse()
+                assert_matches_type(RunStep, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="",
+                    run_id="run_id",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="thread_id",
+                    run_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="",
+                    thread_id="thread_id",
+                    run_id="run_id",
+                )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        step = await client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.steps.with_raw_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.steps.with_streaming_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = await response.parse()
+                assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="",
+                    thread_id="thread_id",
+                )
diff --git a/tests/api_resources/beta/threads/test_messages.py b/tests/api_resources/beta/threads/test_messages.py
index f3fe7dc2bb..7f57002f27 100644
--- a/tests/api_resources/beta/threads/test_messages.py
+++ b/tests/api_resources/beta/threads/test_messages.py
@@ -1,234 +1,622 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads import ThreadMessage
+from openai.types.beta.threads import (
+    Message,
+    MessageDeleted,
+)
+
+# pyright: reportDeprecated=false
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestMessages:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.create(
-            "string",
-            content="x",
-            role="user",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.create(
-            "string",
-            content="x",
-            role="user",
-            file_ids=["string"],
-            metadata={},
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+                attachments=[
+                    {
+                        "file_id": "file_id",
+                        "tools": [{"type": "code_interpreter"}],
+                    }
+                ],
+                metadata={"foo": "string"},
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.create(
-            "string",
-            content="x",
-            role="user",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.create(
+                    thread_id="",
+                    content="string",
+                    role="user",
+                )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.update(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.update(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.list(
-            "string",
-        )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.list(
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+                run_id="run_id",
+            )
+
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.list(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = response.parse()
+                assert_matches_type(SyncCursorPage[Message], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.list(
+                    thread_id="",
+                )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = response.parse()
+                assert_matches_type(MessageDeleted, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.delete(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.delete(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
 
 class TestAsyncMessages:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.create(
-            "string",
-            content="x",
-            role="user",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.create(
-            "string",
-            content="x",
-            role="user",
-            file_ids=["string"],
-            metadata={},
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.with_raw_response.create(
-            "string",
-            content="x",
-            role="user",
-        )
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+                attachments=[
+                    {
+                        "file_id": "file_id",
+                        "tools": [{"type": "code_interpreter"}],
+                    }
+                ],
+                metadata={"foo": "string"},
+            )
+
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.create(
+                    thread_id="",
+                    content="string",
+                    role="user",
+                )
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
-    async def test_method_update(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="",
+                    thread_id="thread_id",
+                )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
-    async def test_method_update_with_all_params(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
-    async def test_raw_response_update(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.list(
-            "string",
-        )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        message = await client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.update(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.update(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.messages.with_raw_response.list(
-            "string",
-        )
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.list(
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+                run_id="run_id",
+            )
+
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.list(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = await response.parse()
+                assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.list(
+                    thread_id="",
+                )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageDeleted, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                message = await response.parse()
+                assert_matches_type(MessageDeleted, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.delete(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.delete(
+                    message_id="",
+                    thread_id="thread_id",
+                )
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index d323dfc354..86a296627e 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -1,308 +1,1117 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads import Run
+from openai.types.beta.threads import (
+    Run,
+)
+
+# pyright: reportDeprecated=false
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestRuns:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-        )
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            instructions="string",
-            metadata={},
-            model="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-        )
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="low",
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-        )
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                )
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        run_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="low",
+                response_format="auto",
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = response.parse()
+                stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                    stream=True,
+                )
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.update(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.update(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.list(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(SyncCursorPage[Run], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.list(
+                    thread_id="",
+                )
+
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_method_submit_tool_outputs(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
+    @parametrize
+    def test_method_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[
+                    {
+                        "output": "output",
+                        "tool_call_id": "tool_call_id",
+                    }
+                ],
+                stream=False,
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_submit_tool_outputs(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+    def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    tool_outputs=[{}],
+                )
+
+    @parametrize
+    def test_method_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = response.parse()
+                stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
+
 
 class TestAsyncRuns:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-        )
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            instructions="string",
-            metadata={},
-            model="string",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-        )
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="low",
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-        )
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
-        )
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                )
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="low",
+                response_format="auto",
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = await response.parse()
+                await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                    stream=True,
+                )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_update(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-        )
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_update_with_all_params(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_update(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.list(
-            "string",
-        )
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.update(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.update(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.list(
-            "string",
-        )
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.list(
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
-    async def test_method_cancel(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
-        )
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.list(
+                    thread_id="",
+                )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_cancel(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
-        )
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_submit_tool_outputs(self, client: AsyncOpenAI) -> None:
-        run = await client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="",
+                    thread_id="thread_id",
+                )
+
+    @parametrize
+    async def test_method_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[
+                    {
+                        "output": "output",
+                        "tool_call_id": "tool_call_id",
+                    }
+                ],
+                stream=False,
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_submit_tool_outputs(self, client: AsyncOpenAI) -> None:
-        response = await client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+    async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    tool_outputs=[{}],
+                )
+
+    @parametrize
+    async def test_method_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = await response.parse()
+                await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
diff --git a/tests/api_resources/chat/__init__.py b/tests/api_resources/chat/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/chat/__init__.py
+++ b/tests/api_resources/chat/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/chat/completions/__init__.py b/tests/api_resources/chat/completions/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/chat/completions/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/chat/completions/test_messages.py b/tests/api_resources/chat/completions/test_messages.py
new file mode 100644
index 0000000000..4a4267e539
--- /dev/null
+++ b/tests/api_resources/chat/completions/test_messages.py
@@ -0,0 +1,121 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.chat import ChatCompletionStoreMessage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestMessages:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
+
+
+class TestAsyncMessages:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 132e00039b..aa8f58f0e5 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -1,24 +1,26 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
+import pydantic
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
-from openai.types.chat import ChatCompletion
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionDeleted,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestCompletions:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create_overload_1(self, client: OpenAI) -> None:
@@ -26,10 +28,10 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -39,57 +41,73 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -99,86 +117,125 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
-        client.chat.completions.create(
+        completion_stream = client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
             stream=True,
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
-        client.chat.completions.create(
+        completion_stream = client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
@@ -186,192 +243,633 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
             stream=True,
         )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        completion = client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        completion = client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        completion = client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_create_disallows_pydantic(self, client: OpenAI) -> None:
+        class MyModel(pydantic.BaseModel):
+            a: str
+
+        with pytest.raises(TypeError, match=r"You tried to pass a `BaseModel` class"):
+            client.chat.completions.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="gpt-4o",
+                response_format=cast(Any, MyModel),
+            )
 
 
 class TestAsyncCompletions:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create_overload_1(self, client: AsyncOpenAI) -> None:
-        completion = await client.chat.completions.create(
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params_overload_1(self, client: AsyncOpenAI) -> None:
-        completion = await client.chat.completions.create(
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_overload_1(self, client: AsyncOpenAI) -> None:
-        response = await client.chat.completions.with_raw_response.create(
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
-    async def test_method_create_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.chat.completions.create(
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        completion_stream = await async_client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
             stream=True,
         )
+        await completion_stream.response.aclose()
 
     @parametrize
-    async def test_method_create_with_all_params_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.chat.completions.create(
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        completion_stream = await async_client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
+            stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
                     },
+                    "type": "approximate",
                 },
+            },
+        )
+        await completion_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.create(
+            messages=[
                 {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                    "content": "string",
+                    "role": "developer",
+                }
             ],
-            top_p=1,
-            user="user-1234",
+            model="gpt-4o",
+            stream=True,
         )
 
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
     @parametrize
-    async def test_raw_response_create_overload_2(self, client: AsyncOpenAI) -> None:
-        response = await client.chat.completions.with_raw_response.create(
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.create(
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4o",
             stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_create_disallows_pydantic(self, async_client: AsyncOpenAI) -> None:
+        class MyModel(pydantic.BaseModel):
+            a: str
+
+        with pytest.raises(TypeError, match=r"You tried to pass a `BaseModel` class"):
+            await async_client.chat.completions.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "system",
+                    }
+                ],
+                model="gpt-4o",
+                response_format=cast(Any, MyModel),
+            )
diff --git a/tests/api_resources/containers/__init__.py b/tests/api_resources/containers/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/containers/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/containers/files/__init__.py b/tests/api_resources/containers/files/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/containers/files/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/containers/files/test_content.py b/tests/api_resources/containers/files/test_content.py
new file mode 100644
index 0000000000..67fcdca36c
--- /dev/null
+++ b/tests/api_resources/containers/files/test_content.py
@@ -0,0 +1,154 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import openai._legacy_response as _legacy_response
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestContent:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        content = client.containers.files.content.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert isinstance(content, _legacy_response.HttpxBinaryResponseContent)
+        assert content.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_raw_response_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+
+        response = client.containers.files.content.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        content = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, content, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        with client.containers.files.content.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            content = response.parse()
+            assert_matches_type(bytes, content, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.content.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.containers.files.content.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
+
+
+class TestAsyncContent:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        content = await async_client.containers.files.content.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert isinstance(content, _legacy_response.HttpxBinaryResponseContent)
+        assert content.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+
+        response = await async_client.containers.files.content.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        content = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, content, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        async with async_client.containers.files.content.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            content = await response.parse()
+            assert_matches_type(bytes, content, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.content.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.containers.files.content.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
diff --git a/tests/api_resources/containers/test_files.py b/tests/api_resources/containers/test_files.py
new file mode 100644
index 0000000000..f9d82d005c
--- /dev/null
+++ b/tests/api_resources/containers/test_files.py
@@ -0,0 +1,411 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.containers import (
+    FileListResponse,
+    FileCreateResponse,
+    FileRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestFiles:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file = client.containers.files.create(
+            container_id="container_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.containers.files.create(
+            container_id="container_id",
+            file=b"raw file contents",
+            file_id="file_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.containers.files.with_raw_response.create(
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.containers.files.with_streaming_response.create(
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileCreateResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.create(
+                container_id="",
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file = client.containers.files.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.containers.files.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.containers.files.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.containers.files.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        file = client.containers.files.list(
+            container_id="container_id",
+        )
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        file = client.containers.files.list(
+            container_id="container_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.containers.files.with_raw_response.list(
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.containers.files.with_streaming_response.list(
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.list(
+                container_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        file = client.containers.files.delete(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert file is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.containers.files.with_raw_response.delete(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert file is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.containers.files.with_streaming_response.delete(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert file is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.delete(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.containers.files.with_raw_response.delete(
+                file_id="",
+                container_id="container_id",
+            )
+
+
+class TestAsyncFiles:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.create(
+            container_id="container_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.create(
+            container_id="container_id",
+            file=b"raw file contents",
+            file_id="file_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.files.with_raw_response.create(
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.files.with_streaming_response.create(
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileCreateResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.create(
+                container_id="",
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.files.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.files.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileRetrieveResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.containers.files.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.list(
+            container_id="container_id",
+        )
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.list(
+            container_id="container_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.files.with_raw_response.list(
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.files.with_streaming_response.list(
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.list(
+                container_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.delete(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert file is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.files.with_raw_response.delete(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert file is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.files.with_streaming_response.delete(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert file is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.delete(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.containers.files.with_raw_response.delete(
+                file_id="",
+                container_id="container_id",
+            )
diff --git a/tests/api_resources/evals/__init__.py b/tests/api_resources/evals/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/__init__.py b/tests/api_resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/runs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/test_output_items.py b/tests/api_resources/evals/runs/test_output_items.py
new file mode 100644
index 0000000000..673867ac42
--- /dev/null
+++ b/tests/api_resources/evals/runs/test_output_items.py
@@ -0,0 +1,265 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals.runs import OutputItemListResponse, OutputItemRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestOutputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncOutputItems:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/evals/test_runs.py b/tests/api_resources/evals/test_runs.py
new file mode 100644
index 0000000000..1367cb4bab
--- /dev/null
+++ b/tests/api_resources/evals/test_runs.py
@@ -0,0 +1,591 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals import (
+    RunListResponse,
+    RunCancelResponse,
+    RunCreateResponse,
+    RunDeleteResponse,
+    RunRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestRuns:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        run = client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        run = client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        run = client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncRuns:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/fine_tuning/__init__.py b/tests/api_resources/fine_tuning/__init__.py
index 1016754ef3..fd8019a9a1 100644
--- a/tests/api_resources/fine_tuning/__init__.py
+++ b/tests/api_resources/fine_tuning/__init__.py
@@ -1 +1 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/alpha/__init__.py b/tests/api_resources/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/alpha/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/alpha/test_graders.py b/tests/api_resources/fine_tuning/alpha/test_graders.py
new file mode 100644
index 0000000000..4a237114b6
--- /dev/null
+++ b/tests/api_resources/fine_tuning/alpha/test_graders.py
@@ -0,0 +1,285 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.fine_tuning.alpha import (
+    GraderRunResponse,
+    GraderValidateResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestGraders:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_run(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_method_run_with_all_params(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+            item={},
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_raw_response_run(self, client: OpenAI) -> None:
+        response = client.fine_tuning.alpha.graders.with_raw_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_streaming_response_run(self, client: OpenAI) -> None:
+        with client.fine_tuning.alpha.graders.with_streaming_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = response.parse()
+            assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_validate(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_method_validate_with_all_params(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_raw_response_validate(self, client: OpenAI) -> None:
+        response = client.fine_tuning.alpha.graders.with_raw_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_streaming_response_validate(self, client: OpenAI) -> None:
+        with client.fine_tuning.alpha.graders.with_streaming_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = response.parse()
+            assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncGraders:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_run(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_method_run_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+            item={},
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_raw_response_run(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.alpha.graders.with_raw_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_run(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.alpha.graders.with_streaming_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = await response.parse()
+            assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_validate(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_method_validate_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_raw_response_validate(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.alpha.graders.with_raw_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_validate(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.alpha.graders.with_streaming_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = await response.parse()
+            assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/fine_tuning/checkpoints/__init__.py b/tests/api_resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/checkpoints/test_permissions.py b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
new file mode 100644
index 0000000000..9420e3a34c
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
@@ -0,0 +1,319 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionDeleteResponse,
+    PermissionRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestPermissions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
+
+
+class TestAsyncPermissions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..bb11529263
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,119 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestCheckpoints:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
+
+
+class TestAsyncCheckpoints:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index 5716a23d54..8a35255885 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -1,30 +1,30 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestJobs:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
@@ -32,13 +32,62 @@ def test_method_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
+                    },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "type": "supervised",
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "reinforcement": {
+                    "grader": {
+                        "input": "input",
+                        "name": "name",
+                        "operation": "eq",
+                        "reference": "reference",
+                        "type": "string_check",
+                    },
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "compute_multiplier": "auto",
+                        "eval_interval": "auto",
+                        "eval_samples": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                        "reasoning_effort": "default",
+                    },
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+            },
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
@@ -47,13 +96,29 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.create(
+            model="gpt-4o-mini",
+            training_file="file-abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.retrieve(
@@ -66,10 +131,32 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.retrieve(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.retrieve(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list()
@@ -80,16 +167,30 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list(
             after="string",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.cancel(
@@ -102,10 +203,32 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.cancel(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.cancel(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.cancel(
+                "",
+            )
+
     @parametrize
     def test_method_list_events(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
@@ -127,111 +250,332 @@ def test_raw_response_list_events(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list_events(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.list_events(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list_events(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.list_events(
+                "",
+            )
+
+    @parametrize
+    def test_method_pause(self, client: OpenAI) -> None:
+        job = client.fine_tuning.jobs.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_pause(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.with_raw_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_pause(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_pause(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.pause(
+                "",
+            )
+
+    @parametrize
+    def test_method_resume(self, client: OpenAI) -> None:
+        job = client.fine_tuning.jobs.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_resume(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.with_raw_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_resume(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_resume(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.resume(
+                "",
+            )
+
 
 class TestAsyncJobs:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.create(
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.create(
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
+                    },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "type": "supervised",
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "reinforcement": {
+                    "grader": {
+                        "input": "input",
+                        "name": "name",
+                        "operation": "eq",
+                        "reference": "reference",
+                        "type": "string_check",
+                    },
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "compute_multiplier": "auto",
+                        "eval_interval": "auto",
+                        "eval_samples": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                        "reasoning_effort": "default",
+                    },
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+            },
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.create(
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.retrieve(
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.create(
+            model="gpt-4o-mini",
+            training_file="file-abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.retrieve(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tuning.jobs.with_raw_response.retrieve(
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.retrieve(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.list()
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.retrieve(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.list()
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.list(
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.list(
             after="string",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tuning.jobs.with_raw_response.list()
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
     @parametrize
-    async def test_method_cancel(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.cancel(
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.cancel(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_raw_response_cancel(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tuning.jobs.with_raw_response.cancel(
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.cancel(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
     @parametrize
-    async def test_method_list_events(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.list_events(
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.cancel(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list_events(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
-    async def test_method_list_events_with_all_params(self, client: AsyncOpenAI) -> None:
-        job = await client.fine_tuning.jobs.list_events(
+    async def test_method_list_events_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
             after="string",
             limit=0,
@@ -239,10 +583,108 @@ async def test_method_list_events_with_all_params(self, client: AsyncOpenAI) ->
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
-    async def test_raw_response_list_events(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tuning.jobs.with_raw_response.list_events(
+    async def test_raw_response_list_events(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list_events(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.list_events(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list_events(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.list_events(
+                "",
+            )
+
+    @parametrize
+    async def test_method_pause(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_pause(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_pause(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_pause(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.pause(
+                "",
+            )
+
+    @parametrize
+    async def test_method_resume(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_resume(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_resume(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_resume(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.resume(
+                "",
+            )
diff --git a/tests/api_resources/responses/__init__.py b/tests/api_resources/responses/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/responses/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
new file mode 100644
index 0000000000..e8e3893bad
--- /dev/null
+++ b/tests/api_resources/responses/test_input_items.py
@@ -0,0 +1,125 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.responses import ResponseItem
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestInputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            before="before",
+            include=["code_interpreter_call.outputs"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = response.parse()
+            assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
+
+
+class TestAsyncInputItems:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            before="before",
+            include=["code_interpreter_call.outputs"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
new file mode 100644
index 0000000000..6775094a58
--- /dev/null
+++ b/tests/api_resources/test_batches.py
@@ -0,0 +1,337 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Batch
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        batch = client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        batch = client.batches.list()
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        batch = client.batches.list(
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        batch = client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.cancel(
+                "",
+            )
+
+
+class TestAsyncBatches:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/responses",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.list()
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.list(
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.cancel(
+                "",
+            )
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index b12fd6401e..1c5271df75 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -1,24 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import Completion
-from openai._client import OpenAI, AsyncOpenAI
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestCompletions:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create_overload_1(self, client: OpenAI) -> None:
@@ -41,9 +38,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -57,21 +55,38 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
             model="string",
             prompt="This is a test.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(Completion, completion, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(Completion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
-        client.completions.create(
+        completion_stream = client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
-        client.completions.create(
+        completion_stream = client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
@@ -83,13 +98,15 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
             user="user-1234",
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
@@ -98,26 +115,43 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
             prompt="This is a test.",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncCompletions:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create_overload_1(self, client: AsyncOpenAI) -> None:
-        completion = await client.completions.create(
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.completions.create(
             model="string",
             prompt="This is a test.",
         )
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params_overload_1(self, client: AsyncOpenAI) -> None:
-        completion = await client.completions.create(
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.completions.create(
             model="string",
             prompt="This is a test.",
             best_of=0,
@@ -128,9 +162,10 @@ async def test_method_create_with_all_params_overload_1(self, client: AsyncOpenA
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -139,26 +174,43 @@ async def test_method_create_with_all_params_overload_1(self, client: AsyncOpenA
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_overload_1(self, client: AsyncOpenAI) -> None:
-        response = await client.completions.with_raw_response.create(
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.completions.with_raw_response.create(
             model="string",
             prompt="This is a test.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
-    async def test_method_create_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.completions.create(
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(Completion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        completion_stream = await async_client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
         )
+        await completion_stream.response.aclose()
 
     @parametrize
-    async def test_method_create_with_all_params_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.completions.create(
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        completion_stream = await async_client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
@@ -170,20 +222,39 @@ async def test_method_create_with_all_params_overload_2(self, client: AsyncOpenA
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
+            stream_options={"include_usage": True},
             suffix="test.",
             temperature=1,
             top_p=1,
             user="user-1234",
         )
+        await completion_stream.response.aclose()
 
     @parametrize
-    async def test_raw_response_create_overload_2(self, client: AsyncOpenAI) -> None:
-        response = await client.completions.with_raw_response.create(
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.completions.with_raw_response.create(
             model="string",
             prompt="This is a test.",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_containers.py b/tests/api_resources/test_containers.py
new file mode 100644
index 0000000000..c972f6539d
--- /dev/null
+++ b/tests/api_resources/test_containers.py
@@ -0,0 +1,335 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    ContainerListResponse,
+    ContainerCreateResponse,
+    ContainerRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestContainers:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        container = client.containers.create(
+            name="name",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        container = client.containers.create(
+            name="name",
+            expires_after={
+                "anchor": "last_active_at",
+                "minutes": 0,
+            },
+            file_ids=["string"],
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.create(
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.create(
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        container = client.containers.retrieve(
+            "container_id",
+        )
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.retrieve(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.retrieve(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        container = client.containers.list()
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        container = client.containers.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        container = client.containers.delete(
+            "container_id",
+        )
+        assert container is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.delete(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert container is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.delete(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert container is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncContainers:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.create(
+            name="name",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.create(
+            name="name",
+            expires_after={
+                "anchor": "last_active_at",
+                "minutes": 0,
+            },
+            file_ids=["string"],
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.create(
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.create(
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.retrieve(
+            "container_id",
+        )
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.retrieve(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.retrieve(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.list()
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.delete(
+            "container_id",
+        )
+        assert container is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.delete(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert container is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.delete(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert container is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_edits.py b/tests/api_resources/test_edits.py
deleted file mode 100644
index 76069d6b83..0000000000
--- a/tests/api_resources/test_edits.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import os
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.types import Edit
-from openai._client import OpenAI, AsyncOpenAI
-
-# pyright: reportDeprecated=false
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
-
-
-class TestEdits:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            edit = client.edits.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-            )
-        assert_matches_type(Edit, edit, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            edit = client.edits.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-                input="What day of the wek is it?",
-                n=1,
-                temperature=1,
-                top_p=1,
-            )
-        assert_matches_type(Edit, edit, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            response = client.edits.with_raw_response.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-            )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        edit = response.parse()
-        assert_matches_type(Edit, edit, path=["response"])
-
-
-class TestAsyncEdits:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            edit = await client.edits.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-            )
-        assert_matches_type(Edit, edit, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            edit = await client.edits.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-                input="What day of the wek is it?",
-                n=1,
-                temperature=1,
-                top_p=1,
-            )
-        assert_matches_type(Edit, edit, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        with pytest.warns(DeprecationWarning):
-            response = await client.edits.with_raw_response.create(
-                instruction="Fix the spelling mistakes.",
-                model="text-davinci-edit-001",
-            )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        edit = response.parse()
-        assert_matches_type(Edit, edit, path=["response"])
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index faf07ffb7c..ce6e213d59 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -1,30 +1,27 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import CreateEmbeddingResponse
-from openai._client import OpenAI, AsyncOpenAI
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestEmbeddings:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         embedding = client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
@@ -32,7 +29,8 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         embedding = client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
+            dimensions=1,
             encoding_format="float",
             user="user-1234",
         )
@@ -42,42 +40,75 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.embeddings.with_raw_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.embeddings.with_streaming_response.create(
+            input="The quick brown fox jumped over the lazy dog",
+            model="text-embedding-3-small",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            embedding = response.parse()
+            assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncEmbeddings:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        embedding = await client.embeddings.create(
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        embedding = await async_client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        embedding = await client.embeddings.create(
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        embedding = await async_client.embeddings.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
+            dimensions=1,
             encoding_format="float",
             user="user-1234",
         )
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.embeddings.with_raw_response.create(
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.embeddings.with_raw_response.create(
             input="The quick brown fox jumped over the lazy dog",
-            model="text-embedding-ada-002",
+            model="text-embedding-3-small",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.embeddings.with_streaming_response.create(
+            input="The quick brown fox jumped over the lazy dog",
+            model="text-embedding-3-small",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            embedding = await response.parse()
+            assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py
new file mode 100644
index 0000000000..473a4711ca
--- /dev/null
+++ b/tests/api_resources/test_evals.py
@@ -0,0 +1,573 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    EvalListResponse,
+    EvalCreateResponse,
+    EvalDeleteResponse,
+    EvalUpdateResponse,
+    EvalRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestEvals:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        eval = client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        eval = client.evals.list()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        eval = client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncEvals:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index e4cf493319..fc4bb4a18e 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -1,36 +1,33 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import httpx
 import pytest
 from respx import MockRouter
 
+import openai._legacy_response as _legacy_response
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import FileObject, FileDeleted
-from openai._types import BinaryResponseContent
-from openai._client import OpenAI, AsyncOpenAI
-from openai.pagination import SyncPage, AsyncPage
+from openai.pagination import SyncCursorPage, AsyncCursorPage
 
 # pyright: reportDeprecated=false
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestFiles:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         file = client.files.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
@@ -38,12 +35,28 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.create(
+            file=b"raw file contents",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.files.retrieve(
@@ -56,28 +69,66 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.files.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.files.list()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         file = client.files.list(
-            purpose="string",
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
@@ -91,33 +142,78 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleted, file, path=["response"])
 
-    @pytest.mark.skip(reason="mocked response isn't working yet")
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.files.with_raw_response.delete(
+                "",
+            )
+
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/{file_id}/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         file = client.files.content(
             "string",
         )
-        assert isinstance(file, BinaryResponseContent)
+        assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="mocked response isn't working yet")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/{file_id}/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = client.files.with_raw_response.content(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert isinstance(file, BinaryResponseContent)
-        assert file.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, file, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.files.with_streaming_response.content(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(bytes, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_content(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.files.with_raw_response.content(
+                "",
+            )
 
     @parametrize
     def test_method_retrieve_content(self, client: OpenAI) -> None:
@@ -125,6 +221,7 @@ def test_method_retrieve_content(self, client: OpenAI) -> None:
             file = client.files.retrieve_content(
                 "string",
             )
+
         assert_matches_type(str, file, path=["response"])
 
     @parametrize
@@ -133,123 +230,271 @@ def test_raw_response_retrieve_content(self, client: OpenAI) -> None:
             response = client.files.with_raw_response.retrieve_content(
                 "string",
             )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(str, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve_content(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.files.with_streaming_response.retrieve_content(
+                "string",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                file = response.parse()
+                assert_matches_type(str, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve_content(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+                client.files.with_raw_response.retrieve_content(
+                    "",
+                )
+
 
 class TestAsyncFiles:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        file = await client.files.create(
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.files.with_raw_response.create(
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.files.with_raw_response.create(
             file=b"raw file contents",
-            purpose="fine-tune",
+            purpose="assistants",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        file = await client.files.retrieve(
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.files.with_streaming_response.create(
+            file=b"raw file contents",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.retrieve(
             "string",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.files.with_raw_response.retrieve(
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.files.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        file = await client.files.list()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.files.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_list_with_all_params(self, client: AsyncOpenAI) -> None:
-        file = await client.files.list(
-            purpose="string",
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.files.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.list()
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.list(
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.files.with_raw_response.list()
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.files.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
-    async def test_method_delete(self, client: AsyncOpenAI) -> None:
-        file = await client.files.delete(
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.files.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.delete(
             "string",
         )
         assert_matches_type(FileDeleted, file, path=["response"])
 
     @parametrize
-    async def test_raw_response_delete(self, client: AsyncOpenAI) -> None:
-        response = await client.files.with_raw_response.delete(
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.files.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleted, file, path=["response"])
 
-    @pytest.mark.skip(reason="mocked response isn't working yet")
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.files.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.files.with_raw_response.delete(
+                "",
+            )
+
     @parametrize
     @pytest.mark.respx(base_url=base_url)
-    async def test_method_content(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/{file_id}/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        file = await client.files.content(
+    async def test_method_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        file = await async_client.files.content(
             "string",
         )
-        assert isinstance(file, BinaryResponseContent)
+        assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="mocked response isn't working yet")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
-    async def test_raw_response_content(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/{file_id}/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
-        response = await client.files.with_raw_response.content(
+    async def test_raw_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
+        response = await async_client.files.with_raw_response.content(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert isinstance(file, BinaryResponseContent)
-        assert file.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, file, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with async_client.files.with_streaming_response.content(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(bytes, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.files.with_raw_response.content(
+                "",
+            )
 
     @parametrize
-    async def test_method_retrieve_content(self, client: AsyncOpenAI) -> None:
+    async def test_method_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
-            file = await client.files.retrieve_content(
+            file = await async_client.files.retrieve_content(
                 "string",
             )
+
         assert_matches_type(str, file, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve_content(self, client: AsyncOpenAI) -> None:
+    async def test_raw_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
-            response = await client.files.with_raw_response.retrieve_content(
+            response = await async_client.files.with_raw_response.retrieve_content(
                 "string",
             )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(str, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with async_client.files.with_streaming_response.retrieve_content(
+                "string",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                file = await response.parse()
+                assert_matches_type(str, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+                await async_client.files.with_raw_response.retrieve_content(
+                    "",
+                )
diff --git a/tests/api_resources/test_fine_tunes.py b/tests/api_resources/test_fine_tunes.py
deleted file mode 100644
index edaf784848..0000000000
--- a/tests/api_resources/test_fine_tunes.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from __future__ import annotations
-
-import os
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.types import FineTune, FineTuneEventsListResponse
-from openai._client import OpenAI, AsyncOpenAI
-from openai.pagination import SyncPage, AsyncPage
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
-
-
-class TestFineTunes:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.create(
-            training_file="file-abc123",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.create(
-            training_file="file-abc123",
-            batch_size=0,
-            classification_betas=[0.6, 1, 1.5, 2],
-            classification_n_classes=0,
-            classification_positive_class="string",
-            compute_classification_metrics=True,
-            hyperparameters={"n_epochs": "auto"},
-            learning_rate_multiplier=0,
-            model="curie",
-            prompt_loss_weight=0,
-            suffix="x",
-            validation_file="file-abc123",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.create(
-            training_file="file-abc123",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_method_retrieve(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.retrieve(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.retrieve(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_method_list(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.list()
-        assert_matches_type(SyncPage[FineTune], fine_tune, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.list()
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(SyncPage[FineTune], fine_tune, path=["response"])
-
-    @parametrize
-    def test_method_cancel(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.cancel(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    def test_raw_response_cancel(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.cancel(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    def test_method_list_events_overload_1(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    def test_method_list_events_with_all_params_overload_1(self, client: OpenAI) -> None:
-        fine_tune = client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=False,
-        )
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    def test_raw_response_list_events_overload_1(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    def test_method_list_events_overload_2(self, client: OpenAI) -> None:
-        client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=True,
-        )
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    def test_raw_response_list_events_overload_2(self, client: OpenAI) -> None:
-        response = client.fine_tunes.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=True,
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
-
-
-class TestAsyncFineTunes:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
-
-    @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.create(
-            training_file="file-abc123",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.create(
-            training_file="file-abc123",
-            batch_size=0,
-            classification_betas=[0.6, 1, 1.5, 2],
-            classification_n_classes=0,
-            classification_positive_class="string",
-            compute_classification_metrics=True,
-            hyperparameters={"n_epochs": "auto"},
-            learning_rate_multiplier=0,
-            model="curie",
-            prompt_loss_weight=0,
-            suffix="x",
-            validation_file="file-abc123",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.create(
-            training_file="file-abc123",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.retrieve(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.retrieve(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.list()
-        assert_matches_type(AsyncPage[FineTune], fine_tune, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.list()
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(AsyncPage[FineTune], fine_tune, path=["response"])
-
-    @parametrize
-    async def test_method_cancel(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.cancel(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @parametrize
-    async def test_raw_response_cancel(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.cancel(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTune, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    async def test_method_list_events_overload_1(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    async def test_method_list_events_with_all_params_overload_1(self, client: AsyncOpenAI) -> None:
-        fine_tune = await client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=False,
-        )
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    async def test_raw_response_list_events_overload_1(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        fine_tune = response.parse()
-        assert_matches_type(FineTuneEventsListResponse, fine_tune, path=["response"])
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    async def test_method_list_events_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.fine_tunes.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=True,
-        )
-
-    @pytest.mark.skip(reason="Prism chokes on this")
-    @parametrize
-    async def test_raw_response_list_events_overload_2(self, client: AsyncOpenAI) -> None:
-        response = await client.fine_tunes.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            stream=True,
-        )
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index c7f5e5bcd2..10fc56d685 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -1,24 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import ImagesResponse
-from openai._client import OpenAI, AsyncOpenAI
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestImages:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create_variation(self, client: OpenAI) -> None:
@@ -31,7 +28,7 @@ def test_method_create_variation(self, client: OpenAI) -> None:
     def test_method_create_variation_with_all_params(self, client: OpenAI) -> None:
         image = client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
             size="1024x1024",
@@ -44,10 +41,25 @@ def test_raw_response_create_variation(self, client: OpenAI) -> None:
         response = client.images.with_raw_response.create_variation(
             image=b"raw file contents",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_variation(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.create_variation(
+            image=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_edit(self, client: OpenAI) -> None:
         image = client.images.edit(
@@ -61,9 +73,13 @@ def test_method_edit_with_all_params(self, client: OpenAI) -> None:
         image = client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
+            background="transparent",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
+            quality="high",
             response_format="url",
             size="1024x1024",
             user="user-1234",
@@ -76,10 +92,26 @@ def test_raw_response_edit(self, client: OpenAI) -> None:
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_edit(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_generate(self, client: OpenAI) -> None:
         image = client.images.generate(
@@ -91,9 +123,13 @@ def test_method_generate(self, client: OpenAI) -> None:
     def test_method_generate_with_all_params(self, client: OpenAI) -> None:
         image = client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            quality="medium",
             response_format="url",
             size="1024x1024",
             style="vivid",
@@ -106,28 +142,43 @@ def test_raw_response_generate(self, client: OpenAI) -> None:
         response = client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_generate(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncImages:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create_variation(self, client: AsyncOpenAI) -> None:
-        image = await client.images.create_variation(
+    async def test_method_create_variation(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.create_variation(
             image=b"raw file contents",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_create_variation_with_all_params(self, client: AsyncOpenAI) -> None:
-        image = await client.images.create_variation(
+    async def test_method_create_variation_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
             size="1024x1024",
@@ -136,30 +187,49 @@ async def test_method_create_variation_with_all_params(self, client: AsyncOpenAI
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_variation(self, client: AsyncOpenAI) -> None:
-        response = await client.images.with_raw_response.create_variation(
+    async def test_raw_response_create_variation(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.images.with_raw_response.create_variation(
             image=b"raw file contents",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_edit(self, client: AsyncOpenAI) -> None:
-        image = await client.images.edit(
+    async def test_streaming_response_create_variation(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.images.with_streaming_response.create_variation(
+            image=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_edit(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_edit_with_all_params(self, client: AsyncOpenAI) -> None:
-        image = await client.images.edit(
+    async def test_method_edit_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
+            background="transparent",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
+            quality="high",
             response_format="url",
             size="1024x1024",
             user="user-1234",
@@ -167,29 +237,49 @@ async def test_method_edit_with_all_params(self, client: AsyncOpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_raw_response_edit(self, client: AsyncOpenAI) -> None:
-        response = await client.images.with_raw_response.edit(
+    async def test_raw_response_edit(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.images.with_raw_response.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_generate(self, client: AsyncOpenAI) -> None:
-        image = await client.images.generate(
+    async def test_streaming_response_edit(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_generate(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.generate(
             prompt="A cute baby sea otter",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_generate_with_all_params(self, client: AsyncOpenAI) -> None:
-        image = await client.images.generate(
+    async def test_method_generate_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        image = await async_client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            quality="medium",
             response_format="url",
             size="1024x1024",
             style="vivid",
@@ -198,10 +288,25 @@ async def test_method_generate_with_all_params(self, client: AsyncOpenAI) -> Non
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_raw_response_generate(self, client: AsyncOpenAI) -> None:
-        response = await client.images.with_raw_response.generate(
+    async def test_raw_response_generate(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_generate(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 3998809610..cf70871ade 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -1,42 +1,61 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import Model, ModelDeleted
-from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncPage, AsyncPage
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestModels:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         model = client.models.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(Model, model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.retrieve(
+            "gpt-4o-mini",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(Model, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
+            client.models.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         model = client.models.list()
@@ -45,72 +64,164 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(SyncPage[Model], model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[Model], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         model = client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(ModelDeleted, model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.delete(
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelDeleted, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
+            client.models.with_raw_response.delete(
+                "",
+            )
+
 
 class TestAsyncModels:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
-        model = await client.models.retrieve(
-            "gpt-3.5-turbo",
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        model = await async_client.models.retrieve(
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
-    async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
-        response = await client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.models.with_raw_response.retrieve(
+            "gpt-4o-mini",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
-    async def test_method_list(self, client: AsyncOpenAI) -> None:
-        model = await client.models.list()
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.models.with_streaming_response.retrieve(
+            "gpt-4o-mini",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(Model, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
+            await async_client.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        model = await async_client.models.list()
         assert_matches_type(AsyncPage[Model], model, path=["response"])
 
     @parametrize
-    async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
-        response = await client.models.with_raw_response.list()
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.models.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(AsyncPage[Model], model, path=["response"])
 
     @parametrize
-    async def test_method_delete(self, client: AsyncOpenAI) -> None:
-        model = await client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[Model], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        model = await async_client.models.delete(
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
-    async def test_raw_response_delete(self, client: AsyncOpenAI) -> None:
-        response = await client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.models.with_raw_response.delete(
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(ModelDeleted, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.models.with_streaming_response.delete(
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelDeleted, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model` but received ''"):
+            await async_client.models.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_moderations.py b/tests/api_resources/test_moderations.py
index 502030d614..870c9e342f 100644
--- a/tests/api_resources/test_moderations.py
+++ b/tests/api_resources/test_moderations.py
@@ -1,24 +1,21 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import ModerationCreateResponse
-from openai._client import OpenAI, AsyncOpenAI
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-api_key = "My API Key"
 
 
 class TestModerations:
-    strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
@@ -31,7 +28,7 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         moderation = client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
@@ -40,36 +37,66 @@ def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.moderations.with_raw_response.create(
             input="I want to kill them.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         moderation = response.parse()
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.moderations.with_streaming_response.create(
+            input="I want to kill them.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            moderation = response.parse()
+            assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncModerations:
-    strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-    loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
-    parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, client: AsyncOpenAI) -> None:
-        moderation = await client.moderations.create(
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        moderation = await async_client.moderations.create(
             input="I want to kill them.",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, client: AsyncOpenAI) -> None:
-        moderation = await client.moderations.create(
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        moderation = await async_client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
-        response = await client.moderations.with_raw_response.create(
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.moderations.with_raw_response.create(
             input="I want to kill them.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         moderation = response.parse()
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.moderations.with_streaming_response.create(
+            input="I want to kill them.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            moderation = await response.parse()
+            assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
new file mode 100644
index 0000000000..9c76928c8c
--- /dev/null
+++ b/tests/api_resources/test_responses.py
@@ -0,0 +1,666 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.responses import (
+    Response,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestResponses:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create(
+            background=True,
+            include=["code_interpreter_call.outputs"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            reasoning={
+                "effort": "low",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            service_tier="auto",
+            store=True,
+            stream=False,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.create()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            stream=True,
+            background=True,
+            include=["code_interpreter_call.outputs"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            reasoning={
+                "effort": "low",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            service_tier="auto",
+            store=True,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.responses.with_raw_response.create(
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create(
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["code_interpreter_call.outputs"],
+            starting_after=0,
+            stream=False,
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve_overload_1(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve_overload_1(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve_overload_1(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    def test_method_retrieve_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_method_retrieve_with_all_params_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+            include=["code_interpreter_call.outputs"],
+            starting_after=0,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_raw_response_retrieve_overload_2(self, client: OpenAI) -> None:
+        response = client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_retrieve_overload_2(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve_overload_2(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.retrieve(
+                response_id="",
+                stream=True,
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        response = client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        response = client.responses.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.cancel(
+                "",
+            )
+
+
+class TestAsyncResponses:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create(
+            background=True,
+            include=["code_interpreter_call.outputs"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            reasoning={
+                "effort": "low",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            service_tier="auto",
+            store=True,
+            stream=False,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.create()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            stream=True,
+            background=True,
+            include=["code_interpreter_call.outputs"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            reasoning={
+                "effort": "low",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            service_tier="auto",
+            store=True,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.with_raw_response.create(
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["code_interpreter_call.outputs"],
+            starting_after=0,
+            stream=False,
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    async def test_method_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_method_retrieve_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+            include=["code_interpreter_call.outputs"],
+            starting_after=0,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.retrieve(
+                response_id="",
+                stream=True,
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.cancel(
+                "",
+            )
diff --git a/tests/api_resources/test_uploads.py b/tests/api_resources/test_uploads.py
new file mode 100644
index 0000000000..72a2f6c83d
--- /dev/null
+++ b/tests/api_resources/test_uploads.py
@@ -0,0 +1,282 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Upload
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestUploads:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        upload = client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        upload = client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    def test_method_complete(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_method_complete_with_all_params(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_complete(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_complete(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_complete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
+
+
+class TestAsyncUploads:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    async def test_method_complete(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_method_complete_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_complete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_complete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_complete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
diff --git a/tests/api_resources/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
new file mode 100644
index 0000000000..5af95fec41
--- /dev/null
+++ b/tests/api_resources/test_vector_stores.py
@@ -0,0 +1,553 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    VectorStore,
+    VectorStoreDeleted,
+    VectorStoreSearchResponse,
+)
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestVectorStores:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.create()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.retrieve(
+            "vector_store_id",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.update(
+                vector_store_id="",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.list()
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.list(
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.delete(
+            "vector_store_id",
+        )
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_search(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_method_search_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "auto",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_search(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_search(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_search(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
+
+
+class TestAsyncVectorStores:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.create()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.retrieve(
+            "vector_store_id",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
+            expires_after={
+                "anchor": "last_active_at",
+                "days": 1,
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStore, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStore, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.update(
+                vector_store_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.list()
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.list(
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.delete(
+            "vector_store_id",
+        )
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_search(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_search_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "auto",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_search(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_search(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_search(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
diff --git a/tests/api_resources/test_webhooks.py b/tests/api_resources/test_webhooks.py
new file mode 100644
index 0000000000..6b404998e1
--- /dev/null
+++ b/tests/api_resources/test_webhooks.py
@@ -0,0 +1,284 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from unittest import mock
+
+import pytest
+
+import openai
+from openai._exceptions import InvalidWebhookSignatureError
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+# Standardized test constants (matches TypeScript implementation)
+TEST_SECRET = "whsec_RdvaYFYUXuIFuEbvZHwMfYFhUf7aMYjYcmM24+Aj40c="
+TEST_PAYLOAD = '{"id": "evt_685c059ae3a481909bdc86819b066fb6", "object": "event", "created_at": 1750861210, "type": "response.completed", "data": {"id": "resp_123"}}'
+TEST_TIMESTAMP = 1750861210  # Fixed timestamp that matches our test signature
+TEST_WEBHOOK_ID = "wh_685c059ae39c8190af8c71ed1022a24d"
+TEST_SIGNATURE = "v1,gUAg4R2hWouRZqRQG4uJypNS8YK885G838+EHb4nKBY="
+
+
+def create_test_headers(
+    timestamp: int | None = None, signature: str | None = None, webhook_id: str | None = None
+) -> dict[str, str]:
+    """Helper function to create test headers"""
+    return {
+        "webhook-signature": signature or TEST_SIGNATURE,
+        "webhook-timestamp": str(timestamp or TEST_TIMESTAMP),
+        "webhook-id": webhook_id or TEST_WEBHOOK_ID,
+    }
+
+
+class TestWebhooks:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_unwrap_with_secret(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        unwrapped = client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    def test_unwrap_without_secret(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_valid(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        # Should not raise - this is a truly valid signature for this timestamp
+        client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_invalid_secret_format(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_invalid(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+    @parametrize
+    def test_verify_signature_missing_webhook_signature_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers(signature=None)
+        del headers["webhook-signature"]
+        with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_missing_webhook_timestamp_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-timestamp"]
+        with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    def test_verify_signature_missing_webhook_id_header(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-id"]
+        with pytest.raises(ValueError, match="Could not find webhook-id header"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_payload_bytes(self, client: openai.OpenAI) -> None:
+        headers = create_test_headers()
+        client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    def test_unwrap_with_client_secret(self) -> None:
+        test_client = openai.OpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+        headers = create_test_headers()
+
+        unwrapped = test_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    def test_verify_signature_timestamp_too_old(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's older than 5 minutes from our test timestamp
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_timestamp_too_new(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's in the future beyond tolerance from our test timestamp
+        future_timestamp = TEST_TIMESTAMP + 400  # 6 minutes 40 seconds in the future
+        headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_custom_tolerance(self, client: openai.OpenAI) -> None:
+        # Use a timestamp that's older than default tolerance but within custom tolerance
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago from test timestamp
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        # Should fail with default tolerance
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+        # Should also fail with custom tolerance of 10 minutes (signature won't match)
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET, tolerance=600)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_recent_timestamp_succeeds(self, client: openai.OpenAI) -> None:
+        # Use a recent timestamp with dummy signature
+        headers = create_test_headers(signature="v1,dummy_signature")
+
+        # Should fail on signature verification (not timestamp validation)
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_multiple_signatures_one_valid(self, client: openai.OpenAI) -> None:
+        # Test multiple signatures: one invalid, one valid
+        multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+        headers = create_test_headers(signature=multiple_signatures)
+
+        # Should not raise when at least one signature is valid
+        client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    def test_verify_signature_multiple_signatures_all_invalid(self, client: openai.OpenAI) -> None:
+        # Test multiple invalid signatures
+        multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+        headers = create_test_headers(signature=multiple_invalid_signatures)
+
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+
+class TestAsyncWebhooks:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_unwrap_with_secret(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        unwrapped = async_client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    async def test_unwrap_without_secret(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_valid(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        # Should not raise - this is a truly valid signature for this timestamp
+        async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_invalid_secret_format(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(ValueError, match="The webhook secret must either be set"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_signature_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-signature"]
+        with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_timestamp_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-timestamp"]
+        with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @parametrize
+    async def test_verify_signature_missing_webhook_id_header(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        del headers["webhook-id"]
+        with pytest.raises(ValueError, match="Could not find webhook-id header"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_payload_bytes(self, async_client: openai.AsyncOpenAI) -> None:
+        headers = create_test_headers()
+        async_client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    async def test_unwrap_with_client_secret(self) -> None:
+        test_async_client = openai.AsyncOpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+        headers = create_test_headers()
+
+        unwrapped = test_async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+        assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+        assert unwrapped.created_at == 1750861210
+
+    @parametrize
+    async def test_verify_signature_timestamp_too_old(self, async_client: openai.AsyncOpenAI) -> None:
+        # Use a timestamp that's older than 5 minutes from our test timestamp
+        old_timestamp = TEST_TIMESTAMP - 400  # 6 minutes 40 seconds ago
+        headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_timestamp_too_new(self, async_client: openai.AsyncOpenAI) -> None:
+        # Use a timestamp that's in the future beyond tolerance from our test timestamp
+        future_timestamp = TEST_TIMESTAMP + 400  # 6 minutes 40 seconds in the future
+        headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+        with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_multiple_signatures_one_valid(self, async_client: openai.AsyncOpenAI) -> None:
+        # Test multiple signatures: one invalid, one valid
+        multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+        headers = create_test_headers(signature=multiple_signatures)
+
+        # Should not raise when at least one signature is valid
+        async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+    @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+    @parametrize
+    async def test_verify_signature_multiple_signatures_all_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+        # Test multiple invalid signatures
+        multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+        headers = create_test_headers(signature=multiple_invalid_signatures)
+
+        with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+            async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
diff --git a/tests/api_resources/uploads/__init__.py b/tests/api_resources/uploads/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/uploads/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/uploads/test_parts.py b/tests/api_resources/uploads/test_parts.py
new file mode 100644
index 0000000000..191d3a1b04
--- /dev/null
+++ b/tests/api_resources/uploads/test_parts.py
@@ -0,0 +1,108 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.uploads import UploadPart
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestParts:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        part = client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
+
+
+class TestAsyncParts:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        part = await async_client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = await response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/vector_stores/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
new file mode 100644
index 0000000000..ac678ce912
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -0,0 +1,446 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.vector_stores import (
+    VectorStoreFile,
+    VectorStoreFileBatch,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestFileBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_list_files(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list_files(self, client: OpenAI) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list_files(self, client: OpenAI) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list_files(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+
+class TestAsyncFileBatches:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_list_files(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list_files(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
new file mode 100644
index 0000000000..0778704d5d
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -0,0 +1,627 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from openai.types.vector_stores import (
+    VectorStoreFile,
+    FileContentResponse,
+    VectorStoreFileDeleted,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestFiles:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_content(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_content(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_content(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_content(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+
+class TestAsyncFiles:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_content(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_content(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_content(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
diff --git a/tests/conftest.py b/tests/conftest.py
index c3a1efe9df..408bcf76c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,84 @@
-import asyncio
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
 import logging
-from typing import Iterator
+from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
+import httpx
 import pytest
+from pytest_asyncio import is_async_test
+
+from openai import OpenAI, AsyncOpenAI, DefaultAioHttpClient
+from openai._utils import is_dict
+
+if TYPE_CHECKING:
+    from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
 
 pytest.register_assert_rewrite("tests.utils")
 
 logging.getLogger("openai").setLevel(logging.DEBUG)
 
 
+# automatically add `pytest.mark.asyncio()` to all of our async tests
+# so we don't have to add that boilerplate everywhere
+def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
+    pytest_asyncio_tests = (item for item in items if is_async_test(item))
+    session_scope_marker = pytest.mark.asyncio(loop_scope="session")
+    for async_test in pytest_asyncio_tests:
+        async_test.add_marker(session_scope_marker, append=False)
+
+    # We skip tests that use both the aiohttp client and respx_mock as respx_mock
+    # doesn't support custom transports.
+    for item in items:
+        if "async_client" not in item.fixturenames or "respx_mock" not in item.fixturenames:
+            continue
+
+        if not hasattr(item, "callspec"):
+            continue
+
+        async_client_param = item.callspec.params.get("async_client")
+        if is_dict(async_client_param) and async_client_param.get("http_client") == "aiohttp":
+            item.add_marker(pytest.mark.skip(reason="aiohttp client is not compatible with respx_mock"))
+
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+api_key = "My API Key"
+
+
+@pytest.fixture(scope="session")
+def client(request: FixtureRequest) -> Iterator[OpenAI]:
+    strict = getattr(request, "param", True)
+    if not isinstance(strict, bool):
+        raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
+
+    with OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=strict) as client:
+        yield client
+
+
 @pytest.fixture(scope="session")
-def event_loop() -> Iterator[asyncio.AbstractEventLoop]:
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncOpenAI]:
+    param = getattr(request, "param", True)
+
+    # defaults
+    strict = True
+    http_client: None | httpx.AsyncClient = None
+
+    if isinstance(param, bool):
+        strict = param
+    elif is_dict(param):
+        strict = param.get("strict", True)
+        assert isinstance(strict, bool)
+
+        http_client_type = param.get("http_client", "httpx")
+        if http_client_type == "aiohttp":
+            http_client = DefaultAioHttpClient()
+    else:
+        raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
+
+    async with AsyncOpenAI(
+        base_url=base_url, api_key=api_key, _strict_response_validation=strict, http_client=http_client
+    ) as client:
+        yield client
diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/lib/chat/__init__.py b/tests/lib/chat/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/lib/chat/_utils.py b/tests/lib/chat/_utils.py
new file mode 100644
index 0000000000..f3982278f3
--- /dev/null
+++ b/tests/lib/chat/_utils.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, Iterable
+from typing_extensions import TypeAlias
+
+import pytest
+import pydantic
+
+from ...utils import rich_print_str
+
+ReprArgs: TypeAlias = "Iterable[tuple[str | None, Any]]"
+
+
+def print_obj(obj: object, monkeypatch: pytest.MonkeyPatch) -> str:
+    """Pretty print an object to a string"""
+
+    # monkeypatch pydantic model printing so that model fields
+    # are always printed in the same order so we can reliably
+    # use this for snapshot tests
+    original_repr = pydantic.BaseModel.__repr_args__
+
+    def __repr_args__(self: pydantic.BaseModel) -> ReprArgs:
+        return sorted(original_repr(self), key=lambda arg: arg[0] or arg)
+
+    with monkeypatch.context() as m:
+        m.setattr(pydantic.BaseModel, "__repr_args__", __repr_args__)
+
+        string = rich_print_str(obj)
+
+        # we remove all `fn_name.<locals>.` occurrences
+        # so that we can share the same snapshots between
+        # pydantic v1 and pydantic v2 as their output for
+        # generic models differs, e.g.
+        #
+        # v2: `ParsedChatCompletion[test_parse_pydantic_model.<locals>.Location]`
+        # v1: `ParsedChatCompletion[Location]`
+        return clear_locals(string, stacklevel=2)
+
+
+def get_caller_name(*, stacklevel: int = 1) -> str:
+    frame = inspect.currentframe()
+    assert frame is not None
+
+    for i in range(stacklevel):
+        frame = frame.f_back
+        assert frame is not None, f"no {i}th frame"
+
+    return frame.f_code.co_name
+
+
+def clear_locals(string: str, *, stacklevel: int) -> str:
+    caller = get_caller_name(stacklevel=stacklevel + 1)
+    return string.replace(f"{caller}.<locals>.", "")
diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py
new file mode 100644
index 0000000000..e7143bbb68
--- /dev/null
+++ b/tests/lib/chat/test_completions.py
@@ -0,0 +1,1067 @@
+from __future__ import annotations
+
+import os
+import json
+from enum import Enum
+from typing import Any, List, Callable, Optional, Awaitable
+from typing_extensions import Literal, TypeVar
+
+import httpx
+import pytest
+from respx import MockRouter
+from pydantic import Field, BaseModel
+from inline_snapshot import snapshot
+
+import openai
+from openai import OpenAI, AsyncOpenAI
+from openai._utils import assert_signatures_in_sync
+from openai._compat import PYDANTIC_V2
+
+from ._utils import print_obj
+from ...conftest import base_url
+from ..schema_types.query import Query
+
+_T = TypeVar("_T")
+
+# all the snapshots in this file are auto-generated from the live API
+#
+# you can update them with
+#
+# `OPENAI_LIVE=1 pytest --inline-snapshot=fix`
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvaueLEMLNYbT8YzpJxsmiQ6HSY", "object": "chat.completion", "created": 1727346142, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "I\'m unable to provide real-time weather updates. To get the current weather in San Francisco, I recommend checking a reliable weather website or app like the Weather Channel or a local news station.", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 14, "completion_tokens": 37, "total_tokens": 51, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_b40fb1c6fb"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[NoneType](
+    choices=[
+        ParsedChoice[NoneType](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[NoneType](
+                annotations=None,
+                audio=None,
+                content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I
+recommend checking a reliable weather website or app like the Weather Channel or a local news station.",
+                function_call=None,
+                parsed=None,
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727346142,
+    id='chatcmpl-ABfvaueLEMLNYbT8YzpJxsmiQ6HSY',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_b40fb1c6fb',
+    usage=CompletionUsage(
+        completion_tokens=37,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=14,
+        prompt_tokens_details=None,
+        total_tokens=51
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvbtVnTu5DeC4EFnRYj8mtfOM99", "object": "chat.completion", "created": 1727346143, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":65,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 14, "total_tokens": 93, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[Location](
+    choices=[
+        ParsedChoice[Location](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Location](
+                annotations=None,
+                audio=None,
+                content='{"city":"San Francisco","temperature":65,"units":"f"}',
+                function_call=None,
+                parsed=Location(city='San Francisco', temperature=65.0, units='f'),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727346143,
+    id='chatcmpl-ABfvbtVnTu5DeC4EFnRYj8mtfOM99',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_5050236cbd',
+    usage=CompletionUsage(
+        completion_tokens=14,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=79,
+        prompt_tokens_details=None,
+        total_tokens=93
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_optional_default(
+    client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Optional[Literal["c", "f"]] = None
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvcC8grKYsRkSoMp9CCAhbXAd0b", "object": "chat.completion", "created": 1727346144, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":65,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 88, "completion_tokens": 14, "total_tokens": 102, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_b40fb1c6fb"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[Location](
+    choices=[
+        ParsedChoice[Location](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Location](
+                annotations=None,
+                audio=None,
+                content='{"city":"San Francisco","temperature":65,"units":"f"}',
+                function_call=None,
+                parsed=Location(city='San Francisco', temperature=65.0, units='f'),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727346144,
+    id='chatcmpl-ABfvcC8grKYsRkSoMp9CCAhbXAd0b',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_b40fb1c6fb',
+    usage=CompletionUsage(
+        completion_tokens=14,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=88,
+        prompt_tokens_details=None,
+        total_tokens=102
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_enum(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Color(Enum):
+        """The detected color"""
+
+        RED = "red"
+        BLUE = "blue"
+        GREEN = "green"
+
+    class ColorDetection(BaseModel):
+        color: Color
+        hex_color_code: str = Field(description="The hex color code of the detected color")
+
+    if not PYDANTIC_V2:
+        ColorDetection.update_forward_refs(**locals())  # type: ignore
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "user", "content": "What color is a Coke can?"},
+            ],
+            response_format=ColorDetection,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvjIatz0zrZu50gRbMtlp0asZpz", "object": "chat.completion", "created": 1727346151, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"color\\":\\"red\\",\\"hex_color_code\\":\\"#FF0000\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 109, "completion_tokens": 14, "total_tokens": 123, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices[0], monkeypatch) == snapshot(
+        """\
+ParsedChoice[ColorDetection](
+    finish_reason='stop',
+    index=0,
+    logprobs=None,
+    message=ParsedChatCompletionMessage[ColorDetection](
+        annotations=None,
+        audio=None,
+        content='{"color":"red","hex_color_code":"#FF0000"}',
+        function_call=None,
+        parsed=ColorDetection(color=<Color.RED: 'red'>, hex_color_code='#FF0000'),
+        refusal=None,
+        role='assistant',
+        tool_calls=None
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_multiple_choices(
+    client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            n=3,
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvp8qzboW92q8ONDF4DPHlI7ckC", "object": "chat.completion", "created": 1727346157, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":64,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}, {"index": 1, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":65,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}, {"index": 2, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":63.0,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 44, "total_tokens": 123, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_b40fb1c6fb"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":64,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=64.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    ),
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=1,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":65,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=65.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    ),
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=2,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":63.0,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=63.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+@pytest.mark.skipif(not PYDANTIC_V2, reason="dataclasses only supported in v2")
+def test_parse_pydantic_dataclass(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    from pydantic.dataclasses import dataclass
+
+    @dataclass
+    class CalendarEvent:
+        name: str
+        date: str
+        participants: List[str]
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "Extract the event information."},
+                {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
+            ],
+            response_format=CalendarEvent,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvqhz4uUUWsw8Ohw2Mp9B4sKKV8", "object": "chat.completion", "created": 1727346158, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"name\\":\\"Science Fair\\",\\"date\\":\\"Friday\\",\\"participants\\":[\\"Alice\\",\\"Bob\\"]}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 92, "completion_tokens": 17, "total_tokens": 109, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_7568d46099"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[CalendarEvent](
+    choices=[
+        ParsedChoice[CalendarEvent](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[CalendarEvent](
+                annotations=None,
+                audio=None,
+                content='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}',
+                function_call=None,
+                parsed=CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob']),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727346158,
+    id='chatcmpl-ABfvqhz4uUUWsw8Ohw2Mp9B4sKKV8',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_7568d46099',
+    usage=CompletionUsage(
+        completion_tokens=17,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=92,
+        prompt_tokens_details=None,
+        total_tokens=109
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_pydantic_tool_model_all_types(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "look up all my orders in may of last year that were fulfilled but not delivered on time",
+                },
+            ],
+            tools=[openai.pydantic_function_tool(Query)],
+            response_format=Query,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvtNiaTNUF6OymZUnEFc9lPq9p1", "object": "chat.completion", "created": 1727346161, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "tool_calls": [{"id": "call_NKpApJybW1MzOjZO2FzwYw0d", "type": "function", "function": {"name": "Query", "arguments": "{\\"name\\":\\"May 2022 Fulfilled Orders Not Delivered on Time\\",\\"table_name\\":\\"orders\\",\\"columns\\":[\\"id\\",\\"status\\",\\"expected_delivery_date\\",\\"delivered_at\\",\\"shipped_at\\",\\"ordered_at\\",\\"canceled_at\\"],\\"conditions\\":[{\\"column\\":\\"ordered_at\\",\\"operator\\":\\">=\\",\\"value\\":\\"2022-05-01\\"},{\\"column\\":\\"ordered_at\\",\\"operator\\":\\"<=\\",\\"value\\":\\"2022-05-31\\"},{\\"column\\":\\"status\\",\\"operator\\":\\"=\\",\\"value\\":\\"fulfilled\\"},{\\"column\\":\\"delivered_at\\",\\"operator\\":\\">\\",\\"value\\":{\\"column_name\\":\\"expected_delivery_date\\"}}],\\"order_by\\":\\"asc\\"}"}}], "refusal": null}, "logprobs": null, "finish_reason": "tool_calls"}], "usage": {"prompt_tokens": 512, "completion_tokens": 132, "total_tokens": 644, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_7568d46099"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices[0], monkeypatch) == snapshot(
+        """\
+ParsedChoice[Query](
+    finish_reason='tool_calls',
+    index=0,
+    logprobs=None,
+    message=ParsedChatCompletionMessage[Query](
+        annotations=None,
+        audio=None,
+        content=None,
+        function_call=None,
+        parsed=None,
+        refusal=None,
+        role='assistant',
+        tool_calls=[
+            ParsedFunctionToolCall(
+                function=ParsedFunction(
+                    arguments='{"name":"May 2022 Fulfilled Orders Not Delivered on 
+Time","table_name":"orders","columns":["id","status","expected_delivery_date","delivered_at","shipped_at","ordered_at","
+canceled_at"],"conditions":[{"column":"ordered_at","operator":">=","value":"2022-05-01"},{"column":"ordered_at","operato
+r":"<=","value":"2022-05-31"},{"column":"status","operator":"=","value":"fulfilled"},{"column":"delivered_at","operator"
+:">","value":{"column_name":"expected_delivery_date"}}],"order_by":"asc"}',
+                    name='Query',
+                    parsed_arguments=Query(
+                        columns=[
+                            <Column.id: 'id'>,
+                            <Column.status: 'status'>,
+                            <Column.expected_delivery_date: 'expected_delivery_date'>,
+                            <Column.delivered_at: 'delivered_at'>,
+                            <Column.shipped_at: 'shipped_at'>,
+                            <Column.ordered_at: 'ordered_at'>,
+                            <Column.canceled_at: 'canceled_at'>
+                        ],
+                        conditions=[
+                            Condition(column='ordered_at', operator=<Operator.ge: '>='>, value='2022-05-01'),
+                            Condition(column='ordered_at', operator=<Operator.le: '<='>, value='2022-05-31'),
+                            Condition(column='status', operator=<Operator.eq: '='>, value='fulfilled'),
+                            Condition(
+                                column='delivered_at',
+                                operator=<Operator.gt: '>'>,
+                                value=DynamicValue(column_name='expected_delivery_date')
+                            )
+                        ],
+                        name='May 2022 Fulfilled Orders Not Delivered on Time',
+                        order_by=<OrderBy.asc: 'asc'>,
+                        table_name=<Table.orders: 'orders'>
+                    )
+                ),
+                id='call_NKpApJybW1MzOjZO2FzwYw0d',
+                type='function'
+            )
+        ]
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_max_tokens_reached(client: OpenAI, respx_mock: MockRouter) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    with pytest.raises(openai.LengthFinishReasonError):
+        _make_snapshot_request(
+            lambda c: c.chat.completions.parse(
+                model="gpt-4o-2024-08-06",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "What's the weather like in SF?",
+                    },
+                ],
+                max_tokens=1,
+                response_format=Location,
+            ),
+            content_snapshot=snapshot(
+                '{"id": "chatcmpl-ABfvvX7eB1KsfeZj8VcF3z7G7SbaA", "object": "chat.completion", "created": 1727346163, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"", "refusal": null}, "logprobs": null, "finish_reason": "length"}], "usage": {"prompt_tokens": 79, "completion_tokens": 1, "total_tokens": 80, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_7568d46099"}'
+            ),
+            mock_client=client,
+            respx_mock=respx_mock,
+        )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "How do I make anthrax?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvwoKVWPQj2UPlAcAKM7s40GsRx", "object": "chat.completion", "created": 1727346164, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "refusal": "I\'m very sorry, but I can\'t assist with that."}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 12, "total_tokens": 91, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal="I'm very sorry, but I can't assist with that.",
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class GetWeatherArgs(BaseModel):
+        city: str
+        country: str
+        units: Literal["c", "f"] = "c"
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Edinburgh?",
+                },
+            ],
+            tools=[
+                openai.pydantic_function_tool(GetWeatherArgs),
+            ],
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvx6Z4dchiW2nya1N8KMsHFrQRE", "object": "chat.completion", "created": 1727346165, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "tool_calls": [{"id": "call_Y6qJ7ofLgOrBnMD5WbVAeiRV", "type": "function", "function": {"name": "GetWeatherArgs", "arguments": "{\\"city\\":\\"Edinburgh\\",\\"country\\":\\"UK\\",\\"units\\":\\"c\\"}"}}], "refusal": null}, "logprobs": null, "finish_reason": "tool_calls"}], "usage": {"prompt_tokens": 76, "completion_tokens": 24, "total_tokens": 100, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_e45dabd248"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"Edinburgh","country":"UK","units":"c"}',
+                        name='GetWeatherArgs',
+                        parsed_arguments=GetWeatherArgs(city='Edinburgh', country='UK', units='c')
+                    ),
+                    id='call_Y6qJ7ofLgOrBnMD5WbVAeiRV',
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class GetWeatherArgs(BaseModel):
+        """Get the temperature for the given country/city combo"""
+
+        city: str
+        country: str
+        units: Literal["c", "f"] = "c"
+
+    class GetStockPrice(BaseModel):
+        ticker: str
+        exchange: str
+
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Edinburgh?",
+                },
+                {
+                    "role": "user",
+                    "content": "What's the price of AAPL?",
+                },
+            ],
+            tools=[
+                openai.pydantic_function_tool(GetWeatherArgs),
+                openai.pydantic_function_tool(
+                    GetStockPrice, name="get_stock_price", description="Fetch the latest price for a given ticker"
+                ),
+            ],
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvyvfNWKcl7Ohqos4UFrmMs1v4C", "object": "chat.completion", "created": 1727346166, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "tool_calls": [{"id": "call_fdNz3vOBKYgOIpMdWotB9MjY", "type": "function", "function": {"name": "GetWeatherArgs", "arguments": "{\\"city\\": \\"Edinburgh\\", \\"country\\": \\"GB\\", \\"units\\": \\"c\\"}"}}, {"id": "call_h1DWI1POMJLb0KwIyQHWXD4p", "type": "function", "function": {"name": "get_stock_price", "arguments": "{\\"ticker\\": \\"AAPL\\", \\"exchange\\": \\"NASDAQ\\"}"}}], "refusal": null}, "logprobs": null, "finish_reason": "tool_calls"}], "usage": {"prompt_tokens": 149, "completion_tokens": 60, "total_tokens": 209, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_b40fb1c6fb"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city": "Edinburgh", "country": "GB", "units": "c"}',
+                        name='GetWeatherArgs',
+                        parsed_arguments=GetWeatherArgs(city='Edinburgh', country='GB', units='c')
+                    ),
+                    id='call_fdNz3vOBKYgOIpMdWotB9MjY',
+                    type='function'
+                ),
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"ticker": "AAPL", "exchange": "NASDAQ"}',
+                        name='get_stock_price',
+                        parsed_arguments=GetStockPrice(exchange='NASDAQ', ticker='AAPL')
+                    ),
+                    id='call_h1DWI1POMJLb0KwIyQHWXD4p',
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    completion = _make_snapshot_request(
+        lambda c: c.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "city": {"type": "string"},
+                                "state": {"type": "string"},
+                            },
+                            "required": [
+                                "city",
+                                "state",
+                            ],
+                            "additionalProperties": False,
+                        },
+                        "strict": True,
+                    },
+                }
+            ],
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABfvzdvCI6RaIkiEFNjqGXCSYnlzf", "object": "chat.completion", "created": 1727346167, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "tool_calls": [{"id": "call_CUdUoJpsWWVdxXntucvnol1M", "type": "function", "function": {"name": "get_weather", "arguments": "{\\"city\\":\\"San Francisco\\",\\"state\\":\\"CA\\"}"}}], "refusal": null}, "logprobs": null, "finish_reason": "tool_calls"}], "usage": {"prompt_tokens": 48, "completion_tokens": 19, "total_tokens": 67, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(completion.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"San Francisco","state":"CA"}',
+                        name='get_weather',
+                        parsed_arguments={'city': 'San Francisco', 'state': 'CA'}
+                    ),
+                    id='call_CUdUoJpsWWVdxXntucvnol1M',
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+def test_parse_non_strict_tools(client: OpenAI) -> None:
+    with pytest.raises(
+        ValueError, match="`get_weather` is not strict. Only `strict` function tools can be auto-parsed"
+    ):
+        client.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {},
+                    },
+                }
+            ],
+        )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_raw_response(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    response = _make_snapshot_request(
+        lambda c: c.chat.completions.with_raw_response.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABrDYCa8W1w66eUxKDO8TQF1m6trT", "object": "chat.completion", "created": 1727389540, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":58,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 14, "total_tokens": 93, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+    assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
+
+    completion = response.parse()
+    message = completion.choices[0].message
+    assert message.parsed is not None
+    assert isinstance(message.parsed.city, str)
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[Location](
+    choices=[
+        ParsedChoice[Location](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Location](
+                annotations=None,
+                audio=None,
+                content='{"city":"San Francisco","temperature":58,"units":"f"}',
+                function_call=None,
+                parsed=Location(city='San Francisco', temperature=58.0, units='f'),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727389540,
+    id='chatcmpl-ABrDYCa8W1w66eUxKDO8TQF1m6trT',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_5050236cbd',
+    usage=CompletionUsage(
+        completion_tokens=14,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=79,
+        prompt_tokens_details=None,
+        total_tokens=93
+    )
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+@pytest.mark.asyncio
+async def test_async_parse_pydantic_raw_response(
+    async_client: AsyncOpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    response = await _make_async_snapshot_request(
+        lambda c: c.chat.completions.with_raw_response.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(
+            '{"id": "chatcmpl-ABrDQWOiw0PK5JOsxl1D9ooeQgznq", "object": "chat.completion", "created": 1727389532, "model": "gpt-4o-2024-08-06", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\"city\\":\\"San Francisco\\",\\"temperature\\":65,\\"units\\":\\"f\\"}", "refusal": null}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 79, "completion_tokens": 14, "total_tokens": 93, "completion_tokens_details": {"reasoning_tokens": 0}}, "system_fingerprint": "fp_5050236cbd"}'
+        ),
+        mock_client=async_client,
+        respx_mock=respx_mock,
+    )
+    assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
+
+    completion = response.parse()
+    message = completion.choices[0].message
+    assert message.parsed is not None
+    assert isinstance(message.parsed.city, str)
+    assert print_obj(completion, monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[Location](
+    choices=[
+        ParsedChoice[Location](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Location](
+                annotations=None,
+                audio=None,
+                content='{"city":"San Francisco","temperature":65,"units":"f"}',
+                function_call=None,
+                parsed=Location(city='San Francisco', temperature=65.0, units='f'),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727389532,
+    id='chatcmpl-ABrDQWOiw0PK5JOsxl1D9ooeQgznq',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_5050236cbd',
+    usage=CompletionUsage(
+        completion_tokens=14,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=79,
+        prompt_tokens_details=None,
+        total_tokens=93
+    )
+)
+"""
+    )
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.chat.completions.create,
+        checking_client.chat.completions.parse,
+        exclude_params={"response_format", "stream"},
+    )
+
+
+def _make_snapshot_request(
+    func: Callable[[OpenAI], _T],
+    *,
+    content_snapshot: Any,
+    respx_mock: MockRouter,
+    mock_client: OpenAI,
+) -> _T:
+    live = os.environ.get("OPENAI_LIVE") == "1"
+    if live:
+
+        def _on_response(response: httpx.Response) -> None:
+            # update the content snapshot
+            assert json.dumps(json.loads(response.read())) == content_snapshot
+
+        respx_mock.stop()
+
+        client = OpenAI(
+            http_client=httpx.Client(
+                event_hooks={
+                    "response": [_on_response],
+                }
+            )
+        )
+    else:
+        respx_mock.post("/chat/completions").mock(
+            return_value=httpx.Response(
+                200,
+                content=content_snapshot._old_value,
+                headers={"content-type": "application/json"},
+            )
+        )
+
+        client = mock_client
+
+    result = func(client)
+
+    if live:
+        client.close()
+
+    return result
+
+
+async def _make_async_snapshot_request(
+    func: Callable[[AsyncOpenAI], Awaitable[_T]],
+    *,
+    content_snapshot: Any,
+    respx_mock: MockRouter,
+    mock_client: AsyncOpenAI,
+) -> _T:
+    live = os.environ.get("OPENAI_LIVE") == "1"
+    if live:
+
+        async def _on_response(response: httpx.Response) -> None:
+            # update the content snapshot
+            assert json.dumps(json.loads(await response.aread())) == content_snapshot
+
+        respx_mock.stop()
+
+        client = AsyncOpenAI(
+            http_client=httpx.AsyncClient(
+                event_hooks={
+                    "response": [_on_response],
+                }
+            )
+        )
+    else:
+        respx_mock.post("/chat/completions").mock(
+            return_value=httpx.Response(
+                200,
+                content=content_snapshot._old_value,
+                headers={"content-type": "application/json"},
+            )
+        )
+
+        client = mock_client
+
+    result = await func(client)
+
+    if live:
+        await client.close()
+
+    return result
diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py
new file mode 100644
index 0000000000..4680a73e3a
--- /dev/null
+++ b/tests/lib/chat/test_completions_streaming.py
@@ -0,0 +1,1184 @@
+from __future__ import annotations
+
+import os
+from typing import Any, Generic, Callable, Iterator, cast, overload
+from typing_extensions import Literal, TypeVar
+
+import rich
+import httpx
+import pytest
+from respx import MockRouter
+from pydantic import BaseModel
+from inline_snapshot import external, snapshot, outsource
+
+import openai
+from openai import OpenAI, AsyncOpenAI
+from openai._utils import consume_sync_iterator, assert_signatures_in_sync
+from openai._compat import model_copy
+from openai.types.chat import ChatCompletionChunk
+from openai.lib.streaming.chat import (
+    ContentDoneEvent,
+    ChatCompletionStream,
+    ChatCompletionStreamEvent,
+    ChatCompletionStreamState,
+    ChatCompletionStreamManager,
+    ParsedChatCompletionSnapshot,
+)
+from openai.lib._parsing._completions import ResponseFormatT
+
+from ._utils import print_obj
+from ...conftest import base_url
+
+_T = TypeVar("_T")
+
+# all the snapshots in this file are auto-generated from the live API
+#
+# you can update them with
+#
+# `OPENAI_LIVE=1 pytest --inline-snapshot=fix`
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+        ),
+        content_snapshot=snapshot(external("e2aad469b71d*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I 
+recommend checking a reliable weather website or a weather app.",
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+    assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot(
+        """\
+ContentDoneEvent[NoneType](
+    content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I recommend 
+checking a reliable weather website or a weather app.",
+    parsed=None,
+    type='content.done'
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    done_snapshots: list[ParsedChatCompletionSnapshot] = []
+
+    def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStreamEvent[Location]) -> None:
+        if event.type == "content.done":
+            done_snapshots.append(model_copy(stream.current_completion_snapshot, deep=True))
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(external("7e5ea4d12e7c*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+        on_event=on_event,
+    )
+
+    assert len(done_snapshots) == 1
+    assert isinstance(done_snapshots[0].choices[0].message.parsed, Location)
+
+    for event in reversed(listener.events):
+        if event.type == "content.delta":
+            data = cast(Any, event.parsed)
+            assert isinstance(data["city"], str), data
+            assert isinstance(data["temperature"], (int, float)), data
+            assert isinstance(data["units"], str), data
+            break
+    else:
+        rich.print(listener.events)
+        raise AssertionError("Did not find a `content.delta` event")
+
+    assert print_obj(listener.stream.get_final_completion(), monkeypatch) == snapshot(
+        """\
+ParsedChatCompletion[Location](
+    choices=[
+        ParsedChoice[Location](
+            finish_reason='stop',
+            index=0,
+            logprobs=None,
+            message=ParsedChatCompletionMessage[Location](
+                annotations=None,
+                audio=None,
+                content='{"city":"San Francisco","temperature":61,"units":"f"}',
+                function_call=None,
+                parsed=Location(city='San Francisco', temperature=61.0, units='f'),
+                refusal=None,
+                role='assistant',
+                tool_calls=None
+            )
+        )
+    ],
+    created=1727346169,
+    id='chatcmpl-ABfw1e5abtU8OwGr15vOreYVb2MiF',
+    model='gpt-4o-2024-08-06',
+    object='chat.completion',
+    service_tier=None,
+    system_fingerprint='fp_5050236cbd',
+    usage=CompletionUsage(
+        completion_tokens=14,
+        completion_tokens_details=CompletionTokensDetails(
+            accepted_prediction_tokens=None,
+            audio_tokens=None,
+            reasoning_tokens=0,
+            rejected_prediction_tokens=None
+        ),
+        prompt_tokens=79,
+        prompt_tokens_details=None,
+        total_tokens=93
+    )
+)
+"""
+    )
+    assert print_obj(listener.get_event_by_type("content.done"), monkeypatch) == snapshot(
+        """\
+ContentDoneEvent[Location](
+    content='{"city":"San Francisco","temperature":61,"units":"f"}',
+    parsed=Location(city='San Francisco', temperature=61.0, units='f'),
+    type='content.done'
+)
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_multiple_choices(
+    client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            n=3,
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(external("a491adda08c3*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert [e.type for e in listener.events] == snapshot(
+        [
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.delta",
+            "chunk",
+            "content.done",
+            "chunk",
+            "content.done",
+            "chunk",
+            "content.done",
+            "chunk",
+        ]
+    )
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":65,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=65.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    ),
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=1,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":61,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=61.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    ),
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=2,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content='{"city":"San Francisco","temperature":59,"units":"f"}',
+            function_call=None,
+            parsed=Location(city='San Francisco', temperature=59.0, units='f'),
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_max_tokens_reached(client: OpenAI, respx_mock: MockRouter) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    with pytest.raises(openai.LengthFinishReasonError):
+        _make_stream_snapshot_request(
+            lambda c: c.chat.completions.stream(
+                model="gpt-4o-2024-08-06",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "What's the weather like in SF?",
+                    },
+                ],
+                max_tokens=1,
+                response_format=Location,
+            ),
+            content_snapshot=snapshot(external("4cc50a6135d2*.bin")),
+            mock_client=client,
+            respx_mock=respx_mock,
+        )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_model_refusal(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "How do I make anthrax?",
+                },
+            ],
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(external("173417d55340*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.get_event_by_type("refusal.done"), monkeypatch) == snapshot("""\
+RefusalDoneEvent(refusal="I'm sorry, I can't assist with that request.", type='refusal.done')
+""")
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal="I'm sorry, I can't assist with that request.",
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Say foo",
+                },
+            ],
+            logprobs=True,
+        ),
+        content_snapshot=snapshot(external("83b060bae42e*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj([e for e in listener.events if e.type.startswith("logprobs")], monkeypatch) == snapshot("""\
+[
+    LogprobsContentDeltaEvent(
+        content=[
+            ChatCompletionTokenLogprob(bytes=[70, 111, 111], logprob=-0.0025094282, token='Foo', top_logprobs=[])
+        ],
+        snapshot=[
+            ChatCompletionTokenLogprob(bytes=[70, 111, 111], logprob=-0.0025094282, token='Foo', top_logprobs=[])
+        ],
+        type='logprobs.content.delta'
+    ),
+    LogprobsContentDeltaEvent(
+        content=[ChatCompletionTokenLogprob(bytes=[33], logprob=-0.26638845, token='!', top_logprobs=[])],
+        snapshot=[
+            ChatCompletionTokenLogprob(bytes=[70, 111, 111], logprob=-0.0025094282, token='Foo', top_logprobs=[]),
+            ChatCompletionTokenLogprob(bytes=[33], logprob=-0.26638845, token='!', top_logprobs=[])
+        ],
+        type='logprobs.content.delta'
+    ),
+    LogprobsContentDoneEvent(
+        content=[
+            ChatCompletionTokenLogprob(bytes=[70, 111, 111], logprob=-0.0025094282, token='Foo', top_logprobs=[]),
+            ChatCompletionTokenLogprob(bytes=[33], logprob=-0.26638845, token='!', top_logprobs=[])
+        ],
+        type='logprobs.content.done'
+    )
+]
+""")
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\
+[
+    ParsedChoice[NoneType](
+        finish_reason='stop',
+        index=0,
+        logprobs=ChoiceLogprobs(
+            content=[
+                ChatCompletionTokenLogprob(bytes=[70, 111, 111], logprob=-0.0025094282, token='Foo', top_logprobs=[]),
+                ChatCompletionTokenLogprob(bytes=[33], logprob=-0.26638845, token='!', top_logprobs=[])
+            ],
+            refusal=None
+        ),
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content='Foo!',
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+""")
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_refusal_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class Location(BaseModel):
+        city: str
+        temperature: float
+        units: Literal["c", "f"]
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "How do I make anthrax?",
+                },
+            ],
+            logprobs=True,
+            response_format=Location,
+        ),
+        content_snapshot=snapshot(external("569c877e6942*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj([e.type for e in listener.events if e.type.startswith("logprobs")], monkeypatch) == snapshot("""\
+[
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.delta',
+    'logprobs.refusal.done'
+]
+""")
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot("""\
+[
+    ParsedChoice[Location](
+        finish_reason='stop',
+        index=0,
+        logprobs=ChoiceLogprobs(
+            content=None,
+            refusal=[
+                ChatCompletionTokenLogprob(bytes=[73, 39, 109], logprob=-0.0012038043, token="I'm", top_logprobs=[]),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 118, 101, 114, 121],
+                    logprob=-0.8438816,
+                    token=' very',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 115, 111, 114, 114, 121],
+                    logprob=-3.4121115e-06,
+                    token=' sorry',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(bytes=[44], logprob=-3.3809047e-05, token=',', top_logprobs=[]),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 98, 117, 116],
+                    logprob=-0.038048144,
+                    token=' but',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(bytes=[32, 73], logprob=-0.0016109125, token=' I', top_logprobs=[]),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 99, 97, 110, 39, 116],
+                    logprob=-0.0073532974,
+                    token=" can't",
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 97, 115, 115, 105, 115, 116],
+                    logprob=-0.0020837625,
+                    token=' assist',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 119, 105, 116, 104],
+                    logprob=-0.00318354,
+                    token=' with',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(
+                    bytes=[32, 116, 104, 97, 116],
+                    logprob=-0.0017186158,
+                    token=' that',
+                    top_logprobs=[]
+                ),
+                ChatCompletionTokenLogprob(bytes=[46], logprob=-0.57687104, token='.', top_logprobs=[])
+            ]
+        ),
+        message=ParsedChatCompletionMessage[Location](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal="I'm very sorry, but I can't assist with that.",
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+""")
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_pydantic_tool(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class GetWeatherArgs(BaseModel):
+        city: str
+        country: str
+        units: Literal["c", "f"] = "c"
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Edinburgh?",
+                },
+            ],
+            tools=[
+                openai.pydantic_function_tool(GetWeatherArgs),
+            ],
+        ),
+        content_snapshot=snapshot(external("c6aa7e397b71*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[object](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[object](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"Edinburgh","country":"UK","units":"c"}',
+                        name='GetWeatherArgs',
+                        parsed_arguments=GetWeatherArgs(city='Edinburgh', country='UK', units='c')
+                    ),
+                    id='call_c91SqDXlYFuETYv8mUHzz6pp',
+                    index=0,
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"Edinburgh","country":"UK","units":"c"}',
+                        name='GetWeatherArgs',
+                        parsed_arguments=GetWeatherArgs(city='Edinburgh', country='UK', units='c')
+                    ),
+                    id='call_c91SqDXlYFuETYv8mUHzz6pp',
+                    index=0,
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_multiple_pydantic_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    class GetWeatherArgs(BaseModel):
+        """Get the temperature for the given country/city combo"""
+
+        city: str
+        country: str
+        units: Literal["c", "f"] = "c"
+
+    class GetStockPrice(BaseModel):
+        ticker: str
+        exchange: str
+
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Edinburgh?",
+                },
+                {
+                    "role": "user",
+                    "content": "What's the price of AAPL?",
+                },
+            ],
+            tools=[
+                openai.pydantic_function_tool(GetWeatherArgs),
+                openai.pydantic_function_tool(
+                    GetStockPrice, name="get_stock_price", description="Fetch the latest price for a given ticker"
+                ),
+            ],
+        ),
+        content_snapshot=snapshot(external("f82268f2fefd*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[object](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[object](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city": "Edinburgh", "country": "GB", "units": "c"}',
+                        name='GetWeatherArgs',
+                        parsed_arguments=GetWeatherArgs(city='Edinburgh', country='GB', units='c')
+                    ),
+                    id='call_JMW1whyEaYG438VE1OIflxA2',
+                    index=0,
+                    type='function'
+                ),
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"ticker": "AAPL", "exchange": "NASDAQ"}',
+                        name='get_stock_price',
+                        parsed_arguments=GetStockPrice(exchange='NASDAQ', ticker='AAPL')
+                    ),
+                    id='call_DNYTawLBoN8fj3KN6qU9N1Ou',
+                    index=1,
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+    completion = listener.stream.get_final_completion()
+    assert print_obj(completion.choices[0].message.tool_calls, monkeypatch) == snapshot(
+        """\
+[
+    ParsedFunctionToolCall(
+        function=ParsedFunction(
+            arguments='{"city": "Edinburgh", "country": "GB", "units": "c"}',
+            name='GetWeatherArgs',
+            parsed_arguments=GetWeatherArgs(city='Edinburgh', country='GB', units='c')
+        ),
+        id='call_JMW1whyEaYG438VE1OIflxA2',
+        index=0,
+        type='function'
+    ),
+    ParsedFunctionToolCall(
+        function=ParsedFunction(
+            arguments='{"ticker": "AAPL", "exchange": "NASDAQ"}',
+            name='get_stock_price',
+            parsed_arguments=GetStockPrice(exchange='NASDAQ', ticker='AAPL')
+        ),
+        id='call_DNYTawLBoN8fj3KN6qU9N1Ou',
+        index=1,
+        type='function'
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "city": {"type": "string"},
+                                "state": {"type": "string"},
+                            },
+                            "required": [
+                                "city",
+                                "state",
+                            ],
+                            "additionalProperties": False,
+                        },
+                        "strict": True,
+                    },
+                }
+            ],
+        ),
+        content_snapshot=snapshot(external("a247c49c5fcd*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.stream.current_completion_snapshot.choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[object](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[object](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"San Francisco","state":"CA"}',
+                        name='get_weather',
+                        parsed_arguments={'city': 'San Francisco', 'state': 'CA'}
+                    ),
+                    id='call_CTf1nWJLqSeRgDqaCG27xZ74',
+                    index=0,
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF? Give me any JSON back",
+                },
+            ],
+            response_format={"type": "json_object"},
+        ),
+        content_snapshot=snapshot(external("d61558011839*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content='\\n  {\\n    "location": "San Francisco, CA",\\n    "weather": {\\n      "temperature": "18°C",\\n      
+"condition": "Partly Cloudy",\\n      "humidity": "72%",\\n      "windSpeed": "15 km/h",\\n      "windDirection": "NW"\\n   
+},\\n    "forecast": [\\n      {\\n        "day": "Monday",\\n        "high": "20°C",\\n        "low": "14°C",\\n        
+"condition": "Sunny"\\n      },\\n      {\\n        "day": "Tuesday",\\n        "high": "19°C",\\n        "low": "15°C",\\n   
+"condition": "Mostly Cloudy"\\n      },\\n      {\\n        "day": "Wednesday",\\n        "high": "18°C",\\n        "low": 
+"14°C",\\n        "condition": "Cloudy"\\n      }\\n    ]\\n  }\\n',
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_allows_non_strict_tools_but_no_parsing(
+    client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    listener = _make_stream_snapshot_request(
+        lambda c: c.chat.completions.stream(
+            model="gpt-4o-2024-08-06",
+            messages=[{"role": "user", "content": "what's the weather in NYC?"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
+                    },
+                }
+            ],
+        ),
+        content_snapshot=snapshot(external("2018feb66ae1*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(listener.get_event_by_type("tool_calls.function.arguments.done"), monkeypatch) == snapshot("""\
+FunctionToolCallArgumentsDoneEvent(
+    arguments='{"city":"New York City"}',
+    index=0,
+    name='get_weather',
+    parsed_arguments=None,
+    type='tool_calls.function.arguments.done'
+)
+""")
+
+    assert print_obj(listener.stream.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='tool_calls',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content=None,
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=[
+                ParsedFunctionToolCall(
+                    function=ParsedFunction(
+                        arguments='{"city":"New York City"}',
+                        name='get_weather',
+                        parsed_arguments=None
+                    ),
+                    id='call_4XzlGBLtUe9dy3GVNV4jhq7h',
+                    index=0,
+                    type='function'
+                )
+            ]
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.respx(base_url=base_url)
+def test_chat_completion_state_helper(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
+    state = ChatCompletionStreamState()
+
+    def streamer(client: OpenAI) -> Iterator[ChatCompletionChunk]:
+        stream = client.chat.completions.create(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in SF?",
+                },
+            ],
+            stream=True,
+        )
+        for chunk in stream:
+            state.handle_chunk(chunk)
+            yield chunk
+
+    _make_raw_stream_snapshot_request(
+        streamer,
+        content_snapshot=snapshot(external("e2aad469b71d*.bin")),
+        mock_client=client,
+        respx_mock=respx_mock,
+    )
+
+    assert print_obj(state.get_final_completion().choices, monkeypatch) == snapshot(
+        """\
+[
+    ParsedChoice[NoneType](
+        finish_reason='stop',
+        index=0,
+        logprobs=None,
+        message=ParsedChatCompletionMessage[NoneType](
+            annotations=None,
+            audio=None,
+            content="I'm unable to provide real-time weather updates. To get the current weather in San Francisco, I 
+recommend checking a reliable weather website or a weather app.",
+            function_call=None,
+            parsed=None,
+            refusal=None,
+            role='assistant',
+            tool_calls=None
+        )
+    )
+]
+"""
+    )
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_stream_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.chat.completions.create,
+        checking_client.chat.completions.stream,
+        exclude_params={"response_format", "stream"},
+    )
+
+
+class StreamListener(Generic[ResponseFormatT]):
+    def __init__(self, stream: ChatCompletionStream[ResponseFormatT]) -> None:
+        self.stream = stream
+        self.events: list[ChatCompletionStreamEvent[ResponseFormatT]] = []
+
+    def __iter__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
+        for event in self.stream:
+            self.events.append(event)
+            yield event
+
+    @overload
+    def get_event_by_type(self, event_type: Literal["content.done"]) -> ContentDoneEvent[ResponseFormatT] | None: ...
+
+    @overload
+    def get_event_by_type(self, event_type: str) -> ChatCompletionStreamEvent[ResponseFormatT] | None: ...
+
+    def get_event_by_type(self, event_type: str) -> ChatCompletionStreamEvent[ResponseFormatT] | None:
+        return next((e for e in self.events if e.type == event_type), None)
+
+
+def _make_stream_snapshot_request(
+    func: Callable[[OpenAI], ChatCompletionStreamManager[ResponseFormatT]],
+    *,
+    content_snapshot: Any,
+    respx_mock: MockRouter,
+    mock_client: OpenAI,
+    on_event: Callable[[ChatCompletionStream[ResponseFormatT], ChatCompletionStreamEvent[ResponseFormatT]], Any]
+    | None = None,
+) -> StreamListener[ResponseFormatT]:
+    live = os.environ.get("OPENAI_LIVE") == "1"
+    if live:
+
+        def _on_response(response: httpx.Response) -> None:
+            # update the content snapshot
+            assert outsource(response.read()) == content_snapshot
+
+        respx_mock.stop()
+
+        client = OpenAI(
+            http_client=httpx.Client(
+                event_hooks={
+                    "response": [_on_response],
+                }
+            )
+        )
+    else:
+        respx_mock.post("/chat/completions").mock(
+            return_value=httpx.Response(
+                200,
+                content=content_snapshot._old_value._load_value(),
+                headers={"content-type": "text/event-stream"},
+            )
+        )
+
+        client = mock_client
+
+    with func(client) as stream:
+        listener = StreamListener(stream)
+
+        for event in listener:
+            if on_event:
+                on_event(stream, event)
+
+    if live:
+        client.close()
+
+    return listener
+
+
+def _make_raw_stream_snapshot_request(
+    func: Callable[[OpenAI], Iterator[ChatCompletionChunk]],
+    *,
+    content_snapshot: Any,
+    respx_mock: MockRouter,
+    mock_client: OpenAI,
+) -> None:
+    live = os.environ.get("OPENAI_LIVE") == "1"
+    if live:
+
+        def _on_response(response: httpx.Response) -> None:
+            # update the content snapshot
+            assert outsource(response.read()) == content_snapshot
+
+        respx_mock.stop()
+
+        client = OpenAI(
+            http_client=httpx.Client(
+                event_hooks={
+                    "response": [_on_response],
+                }
+            )
+        )
+    else:
+        respx_mock.post("/chat/completions").mock(
+            return_value=httpx.Response(
+                200,
+                content=content_snapshot._old_value._load_value(),
+                headers={"content-type": "text/event-stream"},
+            )
+        )
+
+        client = mock_client
+
+    stream = func(client)
+    consume_sync_iterator(stream)
+
+    if live:
+        client.close()
diff --git a/tests/lib/schema_types/query.py b/tests/lib/schema_types/query.py
new file mode 100644
index 0000000000..03439fb17f
--- /dev/null
+++ b/tests/lib/schema_types/query.py
@@ -0,0 +1,52 @@
+from enum import Enum
+from typing import List, Union, Optional
+
+from pydantic import BaseModel
+
+
+class Table(str, Enum):
+    orders = "orders"
+    customers = "customers"
+    products = "products"
+
+
+class Column(str, Enum):
+    id = "id"
+    status = "status"
+    expected_delivery_date = "expected_delivery_date"
+    delivered_at = "delivered_at"
+    shipped_at = "shipped_at"
+    ordered_at = "ordered_at"
+    canceled_at = "canceled_at"
+
+
+class Operator(str, Enum):
+    eq = "="
+    gt = ">"
+    lt = "<"
+    le = "<="
+    ge = ">="
+    ne = "!="
+
+
+class OrderBy(str, Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class DynamicValue(BaseModel):
+    column_name: str
+
+
+class Condition(BaseModel):
+    column: str
+    operator: Operator
+    value: Union[str, int, DynamicValue]
+
+
+class Query(BaseModel):
+    name: Optional[str] = None
+    table_name: Table
+    columns: List[Column]
+    conditions: List[Condition]
+    order_by: OrderBy
diff --git a/tests/lib/test_assistants.py b/tests/lib/test_assistants.py
new file mode 100644
index 0000000000..08ea9300c3
--- /dev/null
+++ b/tests/lib/test_assistants.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from openai._utils import assert_signatures_in_sync
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_create_and_run_poll_method_definition_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.beta.threads.create_and_run,  # pyright: ignore[reportDeprecated]
+        checking_client.beta.threads.create_and_run_poll,
+        exclude_params={"stream"},
+    )
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_create_and_run_stream_method_definition_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.beta.threads.create_and_run,  # pyright: ignore[reportDeprecated]
+        checking_client.beta.threads.create_and_run_stream,
+        exclude_params={"stream"},
+    )
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_run_stream_method_definition_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.beta.threads.runs.create,  # pyright: ignore[reportDeprecated]
+        checking_client.beta.threads.runs.stream,  # pyright: ignore[reportDeprecated]
+        exclude_params={"stream"},
+    )
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_create_and_poll_method_definition_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    assert_signatures_in_sync(
+        checking_client.beta.threads.runs.create,  # pyright: ignore[reportDeprecated]
+        checking_client.beta.threads.runs.create_and_poll,  # pyright: ignore[reportDeprecated]
+        exclude_params={"stream"},
+    )
diff --git a/tests/lib/test_audio.py b/tests/lib/test_audio.py
new file mode 100644
index 0000000000..ff8dba4714
--- /dev/null
+++ b/tests/lib/test_audio.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import sys
+import inspect
+import typing_extensions
+from typing import get_args
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import evaluate_forwardref
+from openai._utils import assert_signatures_in_sync
+from openai._compat import is_literal_type
+from openai._utils._typing import is_union_type
+from openai.types.audio_response_format import AudioResponseFormat
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_translation_create_overloads_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    fn = checking_client.audio.translations.create
+    overload_response_formats: set[str] = set()
+
+    for i, overload in enumerate(typing_extensions.get_overloads(fn)):
+        assert_signatures_in_sync(
+            fn,
+            overload,
+            exclude_params={"response_format", "stream"},
+            description=f" for overload {i}",
+        )
+
+        sig = inspect.signature(overload)
+        typ = evaluate_forwardref(
+            sig.parameters["response_format"].annotation,
+            globalns=sys.modules[fn.__module__].__dict__,
+        )
+        if is_union_type(typ):
+            for arg in get_args(typ):
+                if not is_literal_type(arg):
+                    continue
+
+                overload_response_formats.update(get_args(arg))
+        elif is_literal_type(typ):
+            overload_response_formats.update(get_args(typ))
+
+    src_response_formats: set[str] = set(get_args(AudioResponseFormat))
+    diff = src_response_formats.difference(overload_response_formats)
+    assert len(diff) == 0, f"some response format options don't have overloads"
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+def test_transcription_create_overloads_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    checking_client: OpenAI | AsyncOpenAI = client if sync else async_client
+
+    fn = checking_client.audio.transcriptions.create
+    overload_response_formats: set[str] = set()
+
+    for i, overload in enumerate(typing_extensions.get_overloads(fn)):
+        assert_signatures_in_sync(
+            fn,
+            overload,
+            exclude_params={"response_format", "stream"},
+            description=f" for overload {i}",
+        )
+
+        sig = inspect.signature(overload)
+        typ = evaluate_forwardref(
+            sig.parameters["response_format"].annotation,
+            globalns=sys.modules[fn.__module__].__dict__,
+        )
+        if is_union_type(typ):
+            for arg in get_args(typ):
+                if not is_literal_type(arg):
+                    continue
+
+                overload_response_formats.update(get_args(arg))
+        elif is_literal_type(typ):
+            overload_response_formats.update(get_args(typ))
+
+    src_response_formats: set[str] = set(get_args(AudioResponseFormat))
+    diff = src_response_formats.difference(overload_response_formats)
+    assert len(diff) == 0, f"some response format options don't have overloads"
diff --git a/tests/lib/test_azure.py b/tests/lib/test_azure.py
index 9360b2925a..52c24eba27 100644
--- a/tests/lib/test_azure.py
+++ b/tests/lib/test_azure.py
@@ -1,8 +1,14 @@
-from typing import Union
-from typing_extensions import Literal
+from __future__ import annotations
 
+import logging
+from typing import Union, cast
+from typing_extensions import Literal, Protocol
+
+import httpx
 import pytest
+from respx import MockRouter
 
+from openai._utils import SensitiveHeadersFilter, is_dict
 from openai._models import FinalRequestOptions
 from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI
 
@@ -22,6 +28,10 @@
 )
 
 
+class MockRequestCall(Protocol):
+    request: httpx.Request
+
+
 @pytest.mark.parametrize("client", [sync_client, async_client])
 def test_implicit_deployment_path(client: Client) -> None:
     req = client._build_request(
@@ -64,3 +74,731 @@ def test_client_copying_override_options(client: Client) -> None:
         api_version="2022-05-01",
     )
     assert copied._custom_query == {"api-version": "2022-05-01"}
+
+
+@pytest.mark.respx()
+def test_client_token_provider_refresh_sync(respx_mock: MockRouter) -> None:
+    respx_mock.post(
+        "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-02-01"
+    ).mock(
+        side_effect=[
+            httpx.Response(500, json={"error": "server error"}),
+            httpx.Response(200, json={"foo": "bar"}),
+        ]
+    )
+
+    counter = 0
+
+    def token_provider() -> str:
+        nonlocal counter
+
+        counter += 1
+
+        if counter == 1:
+            return "first"
+
+        return "second"
+
+    client = AzureOpenAI(
+        api_version="2024-02-01",
+        azure_ad_token_provider=token_provider,
+        azure_endpoint="https://example-resource.azure.openai.com/",
+    )
+    client.chat.completions.create(messages=[], model="gpt-4")
+
+    calls = cast("list[MockRequestCall]", respx_mock.calls)
+
+    assert len(calls) == 2
+
+    assert calls[0].request.headers.get("Authorization") == "Bearer first"
+    assert calls[1].request.headers.get("Authorization") == "Bearer second"
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx()
+async def test_client_token_provider_refresh_async(respx_mock: MockRouter) -> None:
+    respx_mock.post(
+        "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-02-01"
+    ).mock(
+        side_effect=[
+            httpx.Response(500, json={"error": "server error"}),
+            httpx.Response(200, json={"foo": "bar"}),
+        ]
+    )
+
+    counter = 0
+
+    def token_provider() -> str:
+        nonlocal counter
+
+        counter += 1
+
+        if counter == 1:
+            return "first"
+
+        return "second"
+
+    client = AsyncAzureOpenAI(
+        api_version="2024-02-01",
+        azure_ad_token_provider=token_provider,
+        azure_endpoint="https://example-resource.azure.openai.com/",
+    )
+
+    await client.chat.completions.create(messages=[], model="gpt-4")
+
+    calls = cast("list[MockRequestCall]", respx_mock.calls)
+
+    assert len(calls) == 2
+
+    assert calls[0].request.headers.get("Authorization") == "Bearer first"
+    assert calls[1].request.headers.get("Authorization") == "Bearer second"
+
+
+class TestAzureLogging:
+    @pytest.fixture(autouse=True)
+    def logger_with_filter(self) -> logging.Logger:
+        logger = logging.getLogger("openai")
+        logger.setLevel(logging.DEBUG)
+        logger.addFilter(SensitiveHeadersFilter())
+        return logger
+
+    @pytest.mark.respx()
+    def test_azure_api_key_redacted(self, respx_mock: MockRouter, caplog: pytest.LogCaptureFixture) -> None:
+        respx_mock.post(
+            "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-06-01"
+        ).mock(return_value=httpx.Response(200, json={"model": "gpt-4"}))
+
+        client = AzureOpenAI(
+            api_version="2024-06-01",
+            api_key="example_api_key",
+            azure_endpoint="https://example-resource.azure.openai.com/",
+        )
+
+        with caplog.at_level(logging.DEBUG):
+            client.chat.completions.create(messages=[], model="gpt-4")
+
+        for record in caplog.records:
+            if is_dict(record.args) and record.args.get("headers") and is_dict(record.args["headers"]):
+                assert record.args["headers"]["api-key"] == "<redacted>"
+
+    @pytest.mark.respx()
+    def test_azure_bearer_token_redacted(self, respx_mock: MockRouter, caplog: pytest.LogCaptureFixture) -> None:
+        respx_mock.post(
+            "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-06-01"
+        ).mock(return_value=httpx.Response(200, json={"model": "gpt-4"}))
+
+        client = AzureOpenAI(
+            api_version="2024-06-01",
+            azure_ad_token="example_token",
+            azure_endpoint="https://example-resource.azure.openai.com/",
+        )
+
+        with caplog.at_level(logging.DEBUG):
+            client.chat.completions.create(messages=[], model="gpt-4")
+
+        for record in caplog.records:
+            if is_dict(record.args) and record.args.get("headers") and is_dict(record.args["headers"]):
+                assert record.args["headers"]["Authorization"] == "<redacted>"
+
+    @pytest.mark.asyncio
+    @pytest.mark.respx()
+    async def test_azure_api_key_redacted_async(self, respx_mock: MockRouter, caplog: pytest.LogCaptureFixture) -> None:
+        respx_mock.post(
+            "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-06-01"
+        ).mock(return_value=httpx.Response(200, json={"model": "gpt-4"}))
+
+        client = AsyncAzureOpenAI(
+            api_version="2024-06-01",
+            api_key="example_api_key",
+            azure_endpoint="https://example-resource.azure.openai.com/",
+        )
+
+        with caplog.at_level(logging.DEBUG):
+            await client.chat.completions.create(messages=[], model="gpt-4")
+
+        for record in caplog.records:
+            if is_dict(record.args) and record.args.get("headers") and is_dict(record.args["headers"]):
+                assert record.args["headers"]["api-key"] == "<redacted>"
+
+    @pytest.mark.asyncio
+    @pytest.mark.respx()
+    async def test_azure_bearer_token_redacted_async(
+        self, respx_mock: MockRouter, caplog: pytest.LogCaptureFixture
+    ) -> None:
+        respx_mock.post(
+            "https://example-resource.azure.openai.com/openai/deployments/gpt-4/chat/completions?api-version=2024-06-01"
+        ).mock(return_value=httpx.Response(200, json={"model": "gpt-4"}))
+
+        client = AsyncAzureOpenAI(
+            api_version="2024-06-01",
+            azure_ad_token="example_token",
+            azure_endpoint="https://example-resource.azure.openai.com/",
+        )
+
+        with caplog.at_level(logging.DEBUG):
+            await client.chat.completions.create(messages=[], model="gpt-4")
+
+        for record in caplog.records:
+            if is_dict(record.args) and record.args.get("headers") and is_dict(record.args["headers"]):
+                assert record.args["headers"]["Authorization"] == "<redacted>"
+
+
+@pytest.mark.parametrize(
+    "client,base_url,api,json_data,expected",
+    [
+        # Deployment-based endpoints
+        # AzureOpenAI: No deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: Deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/chat/completions?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: "deployments" in the DNS name
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.example-resource.azure.openai.com/",
+            ),
+            "https://deployments.example-resource.azure.openai.com/openai/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://deployments.example-resource.azure.openai.com/openai/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: Deployment called deployments
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/chat/completions?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: base_url and azure_deployment specified; ignored b/c not supported
+        (
+            AzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example.azure-api.net/PTU/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example.azure-api.net/PTU/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: No deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: Deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/chat/completions?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: "deployments" in the DNS name
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.example-resource.azure.openai.com/",
+            ),
+            "https://deployments.example-resource.azure.openai.com/openai/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://deployments.example-resource.azure.openai.com/openai/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: Deployment called deployments
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/chat/completions?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: base_url and azure_deployment specified; azure_deployment ignored b/c not supported
+        (
+            AsyncAzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example.azure-api.net/PTU/",
+            "/chat/completions",
+            {"model": "deployment-body"},
+            "https://example.azure-api.net/PTU/deployments/deployment-body/chat/completions?api-version=2024-02-01",
+        ),
+    ],
+)
+def test_prepare_url_deployment_endpoint(
+    client: Client, base_url: str, api: str, json_data: dict[str, str], expected: str
+) -> None:
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url=api,
+            json_data=json_data,
+        )
+    )
+    assert req.url == expected
+    assert client.base_url == base_url
+
+
+@pytest.mark.parametrize(
+    "client,base_url,api,json_data,expected",
+    [
+        # Non-deployment endpoints
+        # AzureOpenAI: No deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: No deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/assistants",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/assistants?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: Deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: Deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/assistants",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/assistants?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: "deployments" in the DNS name
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.example-resource.azure.openai.com/",
+            ),
+            "https://deployments.example-resource.azure.openai.com/openai/",
+            "/models",
+            {},
+            "https://deployments.example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: Deployment called "deployments"
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AzureOpenAI: base_url and azure_deployment specified; azure_deployment ignored b/c not supported
+        (
+            AzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example.azure-api.net/PTU/",
+            "/models",
+            {},
+            "https://example.azure-api.net/PTU/models?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: No deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: No deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            "/assistants",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/assistants?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: Deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: Deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            "/assistants",
+            {"model": "deployment-body"},
+            "https://example-resource.azure.openai.com/openai/assistants?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: "deployments" in the DNS name
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.example-resource.azure.openai.com/",
+            ),
+            "https://deployments.example-resource.azure.openai.com/openai/",
+            "/models",
+            {},
+            "https://deployments.example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: Deployment called "deployments"
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            "/models",
+            {},
+            "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01",
+        ),
+        # AsyncAzureOpenAI: base_url and azure_deployment specified; azure_deployment ignored b/c not supported
+        (
+            AsyncAzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example.azure-api.net/PTU/",
+            "/models",
+            {},
+            "https://example.azure-api.net/PTU/models?api-version=2024-02-01",
+        ),
+    ],
+)
+def test_prepare_url_nondeployment_endpoint(
+    client: Client, base_url: str, api: str, json_data: dict[str, str], expected: str
+) -> None:
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url=api,
+            json_data=json_data,
+        )
+    )
+    assert req.url == expected
+    assert client.base_url == base_url
+
+
+@pytest.mark.parametrize(
+    "client,base_url,json_data,expected",
+    [
+        # Realtime endpoint
+        # AzureOpenAI: No deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AzureOpenAI: Deployment specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-client",
+        ),
+        # AzureOpenAI: "deployments" in the DNS name
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.azure.openai.com/",
+            ),
+            "https://deployments.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://deployments.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AzureOpenAI: Deployment called "deployments"
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployments",
+        ),
+        # AzureOpenAI: base_url and azure_deployment specified; azure_deployment ignored b/c not supported
+        (
+            AzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="my-deployment",
+            ),
+            "https://example.azure-api.net/PTU/",
+            {"model": "deployment-body"},
+            "wss://example.azure-api.net/PTU/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AzureOpenAI: websocket_base_url specified
+        (
+            AzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                websocket_base_url="wss://example-resource.azure.openai.com/base",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/base/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+    ],
+)
+def test_prepare_url_realtime(client: AzureOpenAI, base_url: str, json_data: dict[str, str], expected: str) -> None:
+    url, _ = client._configure_realtime(json_data["model"], {})
+    assert str(url) == expected
+    assert client.base_url == base_url
+
+
+@pytest.mark.parametrize(
+    "client,base_url,json_data,expected",
+    [
+        # AsyncAzureOpenAI: No deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AsyncAzureOpenAI: Deployment specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployment-client/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-client",
+        ),
+        # AsyncAzureOpenAI: "deployments" in the DNS name
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://deployments.azure.openai.com/",
+            ),
+            "https://deployments.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://deployments.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AsyncAzureOpenAI: Deployment called "deployments"
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                azure_deployment="deployments",
+            ),
+            "https://example-resource.azure.openai.com/openai/deployments/deployments/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/openai/realtime?api-version=2024-02-01&deployment=deployments",
+        ),
+        # AsyncAzureOpenAI: base_url and azure_deployment specified; azure_deployment ignored b/c not supported
+        (
+            AsyncAzureOpenAI(  # type: ignore
+                api_version="2024-02-01",
+                api_key="example API key",
+                base_url="https://example.azure-api.net/PTU/",
+                azure_deployment="deployment-client",
+            ),
+            "https://example.azure-api.net/PTU/",
+            {"model": "deployment-body"},
+            "wss://example.azure-api.net/PTU/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+        # AsyncAzureOpenAI: websocket_base_url specified
+        (
+            AsyncAzureOpenAI(
+                api_version="2024-02-01",
+                api_key="example API key",
+                azure_endpoint="https://example-resource.azure.openai.com/",
+                websocket_base_url="wss://example-resource.azure.openai.com/base",
+            ),
+            "https://example-resource.azure.openai.com/openai/",
+            {"model": "deployment-body"},
+            "wss://example-resource.azure.openai.com/base/realtime?api-version=2024-02-01&deployment=deployment-body",
+        ),
+    ],
+)
+async def test_prepare_url_realtime_async(
+    client: AsyncAzureOpenAI, base_url: str, json_data: dict[str, str], expected: str
+) -> None:
+    url, _ = await client._configure_realtime(json_data["model"], {})
+    assert str(url) == expected
+    assert client.base_url == base_url
+
+
+def test_client_sets_base_url(/service/client: Client) -> None:
+    client = AzureOpenAI(
+        api_version="2024-02-01",
+        api_key="example API key",
+        azure_endpoint="https://example-resource.azure.openai.com/",
+        azure_deployment="my-deployment",
+    )
+    assert client.base_url == "https://example-resource.azure.openai.com/openai/deployments/my-deployment/"
+
+    # (not recommended) user sets base_url to target different deployment
+    client.base_url = "https://example-resource.azure.openai.com/openai/deployments/different-deployment/"
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url="/chat/completions",
+            json_data={"model": "placeholder"},
+        )
+    )
+    assert (
+        req.url
+        == "https://example-resource.azure.openai.com/openai/deployments/different-deployment/chat/completions?api-version=2024-02-01"
+    )
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url="/models",
+            json_data={},
+        )
+    )
+    assert req.url == "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01"
+
+    # (not recommended) user sets base_url to remove deployment
+    client.base_url = "https://example-resource.azure.openai.com/openai/"
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url="/chat/completions",
+            json_data={"model": "deployment"},
+        )
+    )
+    assert (
+        req.url
+        == "https://example-resource.azure.openai.com/openai/deployments/deployment/chat/completions?api-version=2024-02-01"
+    )
+    req = client._build_request(
+        FinalRequestOptions.construct(
+            method="post",
+            url="/models",
+            json_data={},
+        )
+    )
+    assert req.url == "https://example-resource.azure.openai.com/openai/models?api-version=2024-02-01"
diff --git a/tests/lib/test_old_api.py b/tests/lib/test_old_api.py
new file mode 100644
index 0000000000..bdb2a5398d
--- /dev/null
+++ b/tests/lib/test_old_api.py
@@ -0,0 +1,17 @@
+import pytest
+
+import openai
+from openai.lib._old_api import APIRemovedInV1
+
+
+def test_basic_attribute_access_works() -> None:
+    for attr in dir(openai):
+        getattr(openai, attr)
+
+
+def test_helpful_error_is_raised() -> None:
+    with pytest.raises(APIRemovedInV1):
+        openai.Completion.create()  # type: ignore
+
+    with pytest.raises(APIRemovedInV1):
+        openai.ChatCompletion.create()  # type: ignore
diff --git a/tests/lib/test_pydantic.py b/tests/lib/test_pydantic.py
new file mode 100644
index 0000000000..7e128b70c0
--- /dev/null
+++ b/tests/lib/test_pydantic.py
@@ -0,0 +1,411 @@
+from __future__ import annotations
+
+from enum import Enum
+
+from pydantic import Field, BaseModel
+from inline_snapshot import snapshot
+
+import openai
+from openai._compat import PYDANTIC_V2
+from openai.lib._pydantic import to_strict_json_schema
+
+from .schema_types.query import Query
+
+
+def test_most_types() -> None:
+    if PYDANTIC_V2:
+        assert openai.pydantic_function_tool(Query)["function"] == snapshot(
+            {
+                "name": "Query",
+                "strict": True,
+                "parameters": {
+                    "$defs": {
+                        "Column": {
+                            "enum": [
+                                "id",
+                                "status",
+                                "expected_delivery_date",
+                                "delivered_at",
+                                "shipped_at",
+                                "ordered_at",
+                                "canceled_at",
+                            ],
+                            "title": "Column",
+                            "type": "string",
+                        },
+                        "Condition": {
+                            "properties": {
+                                "column": {"title": "Column", "type": "string"},
+                                "operator": {"$ref": "#/$defs/Operator"},
+                                "value": {
+                                    "anyOf": [
+                                        {"type": "string"},
+                                        {"type": "integer"},
+                                        {"$ref": "#/$defs/DynamicValue"},
+                                    ],
+                                    "title": "Value",
+                                },
+                            },
+                            "required": ["column", "operator", "value"],
+                            "title": "Condition",
+                            "type": "object",
+                            "additionalProperties": False,
+                        },
+                        "DynamicValue": {
+                            "properties": {"column_name": {"title": "Column Name", "type": "string"}},
+                            "required": ["column_name"],
+                            "title": "DynamicValue",
+                            "type": "object",
+                            "additionalProperties": False,
+                        },
+                        "Operator": {"enum": ["=", ">", "<", "<=", ">=", "!="], "title": "Operator", "type": "string"},
+                        "OrderBy": {"enum": ["asc", "desc"], "title": "OrderBy", "type": "string"},
+                        "Table": {"enum": ["orders", "customers", "products"], "title": "Table", "type": "string"},
+                    },
+                    "properties": {
+                        "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "title": "Name"},
+                        "table_name": {"$ref": "#/$defs/Table"},
+                        "columns": {
+                            "items": {"$ref": "#/$defs/Column"},
+                            "title": "Columns",
+                            "type": "array",
+                        },
+                        "conditions": {
+                            "items": {"$ref": "#/$defs/Condition"},
+                            "title": "Conditions",
+                            "type": "array",
+                        },
+                        "order_by": {"$ref": "#/$defs/OrderBy"},
+                    },
+                    "required": ["name", "table_name", "columns", "conditions", "order_by"],
+                    "title": "Query",
+                    "type": "object",
+                    "additionalProperties": False,
+                },
+            }
+        )
+    else:
+        assert openai.pydantic_function_tool(Query)["function"] == snapshot(
+            {
+                "name": "Query",
+                "strict": True,
+                "parameters": {
+                    "title": "Query",
+                    "type": "object",
+                    "properties": {
+                        "name": {"title": "Name", "type": "string"},
+                        "table_name": {"$ref": "#/definitions/Table"},
+                        "columns": {"type": "array", "items": {"$ref": "#/definitions/Column"}},
+                        "conditions": {
+                            "title": "Conditions",
+                            "type": "array",
+                            "items": {"$ref": "#/definitions/Condition"},
+                        },
+                        "order_by": {"$ref": "#/definitions/OrderBy"},
+                    },
+                    "required": ["name", "table_name", "columns", "conditions", "order_by"],
+                    "definitions": {
+                        "Table": {
+                            "title": "Table",
+                            "description": "An enumeration.",
+                            "enum": ["orders", "customers", "products"],
+                            "type": "string",
+                        },
+                        "Column": {
+                            "title": "Column",
+                            "description": "An enumeration.",
+                            "enum": [
+                                "id",
+                                "status",
+                                "expected_delivery_date",
+                                "delivered_at",
+                                "shipped_at",
+                                "ordered_at",
+                                "canceled_at",
+                            ],
+                            "type": "string",
+                        },
+                        "Operator": {
+                            "title": "Operator",
+                            "description": "An enumeration.",
+                            "enum": ["=", ">", "<", "<=", ">=", "!="],
+                            "type": "string",
+                        },
+                        "DynamicValue": {
+                            "title": "DynamicValue",
+                            "type": "object",
+                            "properties": {"column_name": {"title": "Column Name", "type": "string"}},
+                            "required": ["column_name"],
+                            "additionalProperties": False,
+                        },
+                        "Condition": {
+                            "title": "Condition",
+                            "type": "object",
+                            "properties": {
+                                "column": {"title": "Column", "type": "string"},
+                                "operator": {"$ref": "#/definitions/Operator"},
+                                "value": {
+                                    "title": "Value",
+                                    "anyOf": [
+                                        {"type": "string"},
+                                        {"type": "integer"},
+                                        {"$ref": "#/definitions/DynamicValue"},
+                                    ],
+                                },
+                            },
+                            "required": ["column", "operator", "value"],
+                            "additionalProperties": False,
+                        },
+                        "OrderBy": {
+                            "title": "OrderBy",
+                            "description": "An enumeration.",
+                            "enum": ["asc", "desc"],
+                            "type": "string",
+                        },
+                    },
+                    "additionalProperties": False,
+                },
+            }
+        )
+
+
+class Color(Enum):
+    RED = "red"
+    BLUE = "blue"
+    GREEN = "green"
+
+
+class ColorDetection(BaseModel):
+    color: Color = Field(description="The detected color")
+    hex_color_code: str = Field(description="The hex color code of the detected color")
+
+
+def test_enums() -> None:
+    if PYDANTIC_V2:
+        assert openai.pydantic_function_tool(ColorDetection)["function"] == snapshot(
+            {
+                "name": "ColorDetection",
+                "strict": True,
+                "parameters": {
+                    "$defs": {"Color": {"enum": ["red", "blue", "green"], "title": "Color", "type": "string"}},
+                    "properties": {
+                        "color": {
+                            "description": "The detected color",
+                            "enum": ["red", "blue", "green"],
+                            "title": "Color",
+                            "type": "string",
+                        },
+                        "hex_color_code": {
+                            "description": "The hex color code of the detected color",
+                            "title": "Hex Color Code",
+                            "type": "string",
+                        },
+                    },
+                    "required": ["color", "hex_color_code"],
+                    "title": "ColorDetection",
+                    "type": "object",
+                    "additionalProperties": False,
+                },
+            }
+        )
+    else:
+        assert openai.pydantic_function_tool(ColorDetection)["function"] == snapshot(
+            {
+                "name": "ColorDetection",
+                "strict": True,
+                "parameters": {
+                    "properties": {
+                        "color": {
+                            "description": "The detected color",
+                            "title": "Color",
+                            "enum": ["red", "blue", "green"],
+                        },
+                        "hex_color_code": {
+                            "description": "The hex color code of the detected color",
+                            "title": "Hex Color Code",
+                            "type": "string",
+                        },
+                    },
+                    "required": ["color", "hex_color_code"],
+                    "title": "ColorDetection",
+                    "definitions": {
+                        "Color": {"title": "Color", "description": "An enumeration.", "enum": ["red", "blue", "green"]}
+                    },
+                    "type": "object",
+                    "additionalProperties": False,
+                },
+            }
+        )
+
+
+class Star(BaseModel):
+    name: str = Field(description="The name of the star.")
+
+
+class Galaxy(BaseModel):
+    name: str = Field(description="The name of the galaxy.")
+    largest_star: Star = Field(description="The largest star in the galaxy.")
+
+
+class Universe(BaseModel):
+    name: str = Field(description="The name of the universe.")
+    galaxy: Galaxy = Field(description="A galaxy in the universe.")
+
+
+def test_nested_inline_ref_expansion() -> None:
+    if PYDANTIC_V2:
+        assert to_strict_json_schema(Universe) == snapshot(
+            {
+                "title": "Universe",
+                "type": "object",
+                "$defs": {
+                    "Star": {
+                        "title": "Star",
+                        "type": "object",
+                        "properties": {
+                            "name": {
+                                "type": "string",
+                                "title": "Name",
+                                "description": "The name of the star.",
+                            }
+                        },
+                        "required": ["name"],
+                        "additionalProperties": False,
+                    },
+                    "Galaxy": {
+                        "title": "Galaxy",
+                        "type": "object",
+                        "properties": {
+                            "name": {
+                                "type": "string",
+                                "title": "Name",
+                                "description": "The name of the galaxy.",
+                            },
+                            "largest_star": {
+                                "title": "Star",
+                                "type": "object",
+                                "properties": {
+                                    "name": {
+                                        "type": "string",
+                                        "title": "Name",
+                                        "description": "The name of the star.",
+                                    }
+                                },
+                                "required": ["name"],
+                                "description": "The largest star in the galaxy.",
+                                "additionalProperties": False,
+                            },
+                        },
+                        "required": ["name", "largest_star"],
+                        "additionalProperties": False,
+                    },
+                },
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name",
+                        "description": "The name of the universe.",
+                    },
+                    "galaxy": {
+                        "title": "Galaxy",
+                        "type": "object",
+                        "properties": {
+                            "name": {
+                                "type": "string",
+                                "title": "Name",
+                                "description": "The name of the galaxy.",
+                            },
+                            "largest_star": {
+                                "title": "Star",
+                                "type": "object",
+                                "properties": {
+                                    "name": {
+                                        "type": "string",
+                                        "title": "Name",
+                                        "description": "The name of the star.",
+                                    }
+                                },
+                                "required": ["name"],
+                                "description": "The largest star in the galaxy.",
+                                "additionalProperties": False,
+                            },
+                        },
+                        "required": ["name", "largest_star"],
+                        "description": "A galaxy in the universe.",
+                        "additionalProperties": False,
+                    },
+                },
+                "required": ["name", "galaxy"],
+                "additionalProperties": False,
+            }
+        )
+    else:
+        assert to_strict_json_schema(Universe) == snapshot(
+            {
+                "title": "Universe",
+                "type": "object",
+                "definitions": {
+                    "Star": {
+                        "title": "Star",
+                        "type": "object",
+                        "properties": {
+                            "name": {"title": "Name", "description": "The name of the star.", "type": "string"}
+                        },
+                        "required": ["name"],
+                        "additionalProperties": False,
+                    },
+                    "Galaxy": {
+                        "title": "Galaxy",
+                        "type": "object",
+                        "properties": {
+                            "name": {"title": "Name", "description": "The name of the galaxy.", "type": "string"},
+                            "largest_star": {
+                                "title": "Largest Star",
+                                "description": "The largest star in the galaxy.",
+                                "type": "object",
+                                "properties": {
+                                    "name": {"title": "Name", "description": "The name of the star.", "type": "string"}
+                                },
+                                "required": ["name"],
+                                "additionalProperties": False,
+                            },
+                        },
+                        "required": ["name", "largest_star"],
+                        "additionalProperties": False,
+                    },
+                },
+                "properties": {
+                    "name": {
+                        "title": "Name",
+                        "description": "The name of the universe.",
+                        "type": "string",
+                    },
+                    "galaxy": {
+                        "title": "Galaxy",
+                        "description": "A galaxy in the universe.",
+                        "type": "object",
+                        "properties": {
+                            "name": {
+                                "title": "Name",
+                                "description": "The name of the galaxy.",
+                                "type": "string",
+                            },
+                            "largest_star": {
+                                "title": "Largest Star",
+                                "description": "The largest star in the galaxy.",
+                                "type": "object",
+                                "properties": {
+                                    "name": {"title": "Name", "description": "The name of the star.", "type": "string"}
+                                },
+                                "required": ["name"],
+                                "additionalProperties": False,
+                            },
+                        },
+                        "required": ["name", "largest_star"],
+                        "additionalProperties": False,
+                    },
+                },
+                "required": ["name", "galaxy"],
+                "additionalProperties": False,
+            }
+        )
diff --git a/tests/sample_file.txt b/tests/sample_file.txt
new file mode 100644
index 0000000000..af5626b4a1
--- /dev/null
+++ b/tests/sample_file.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/tests/test_client.py b/tests/test_client.py
index c5dbfe4bfe..988e5d994c 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,13 +1,20 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
+import gc
 import os
+import sys
 import json
+import time
 import asyncio
 import inspect
-from typing import Any, Dict, Union, cast
+import subprocess
+import tracemalloc
+from typing import Any, Union, cast
+from textwrap import dedent
 from unittest import mock
+from typing_extensions import Literal
 
 import httpx
 import pytest
@@ -15,14 +22,16 @@
 from pydantic import ValidationError
 
 from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
-from openai._client import OpenAI, AsyncOpenAI
+from openai._types import Omit
 from openai._models import BaseModel, FinalRequestOptions
 from openai._streaming import Stream, AsyncStream
-from openai._exceptions import APIResponseValidationError
+from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
 from openai._base_client import (
     DEFAULT_TIMEOUT,
     HTTPX_DEFAULT_TIMEOUT,
     BaseClient,
+    DefaultHttpxClient,
+    DefaultAsyncHttpxClient,
     make_request_options,
 )
 
@@ -38,6 +47,18 @@ def _get_params(client: BaseClient[Any, Any]) -> dict[str, str]:
     return dict(url.params)
 
 
+def _low_retry_timeout(*_args: Any, **_kwargs: Any) -> float:
+    return 0.1
+
+
+def _get_open_connections(client: OpenAI | AsyncOpenAI) -> int:
+    transport = client._client._transport
+    assert isinstance(transport, httpx.HTTPTransport) or isinstance(transport, httpx.AsyncHTTPTransport)
+
+    pool = transport._pool
+    return len(pool._requests)
+
+
 class TestOpenAI:
     client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
@@ -171,6 +192,69 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
+    def test_copy_build_request(self) -> None:
+        options = FinalRequestOptions(method="get", url="/foo")
+
+        def build_request(options: FinalRequestOptions) -> None:
+            client = self.client.copy()
+            client._build_request(options)
+
+        # ensure that the machinery is warmed up before tracing starts.
+        build_request(options)
+        gc.collect()
+
+        tracemalloc.start(1000)
+
+        snapshot_before = tracemalloc.take_snapshot()
+
+        ITERATIONS = 10
+        for _ in range(ITERATIONS):
+            build_request(options)
+
+        gc.collect()
+        snapshot_after = tracemalloc.take_snapshot()
+
+        tracemalloc.stop()
+
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
+            if diff.count == 0:
+                # Avoid false positives by considering only leaks (i.e. allocations that persist).
+                return
+
+            if diff.count % ITERATIONS != 0:
+                # Avoid false positives by considering only leaks that appear per iteration.
+                return
+
+            for frame in diff.traceback:
+                if any(
+                    frame.filename.endswith(fragment)
+                    for fragment in [
+                        # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
+                        #
+                        # removing the decorator fixes the leak for reasons we don't understand.
+                        "openai/_legacy_response.py",
+                        "openai/_response.py",
+                        # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
+                        "openai/_compat.py",
+                        # Standard library leaks we don't care about.
+                        "/logging/__init__.py",
+                    ]
+                ):
+                    return
+
+            leaks.append(diff)
+
+        leaks: list[tracemalloc.StatisticDiff] = []
+        for diff in snapshot_after.compare_to(snapshot_before, "traceback"):
+            add_leak(leaks, diff)
+        if leaks:
+            for leak in leaks:
+                print("MEMORY LEAK:", leak)
+                for frame in leak.traceback:
+                    print(frame)
+            raise AssertionError()
+
     def test_request_timeout(self) -> None:
         request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -220,6 +304,16 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+    async def test_invalid_http_client(self) -> None:
+        with pytest.raises(TypeError, match="Invalid `http_client` arg"):
+            async with httpx.AsyncClient() as http_client:
+                OpenAI(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
+
     def test_default_headers_option(self) -> None:
         client = OpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
@@ -246,8 +340,9 @@ def test_validate_headers(self) -> None:
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
-        with pytest.raises(Exception):
-            client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+        with pytest.raises(OpenAIError):
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
     def test_default_query_option(self) -> None:
@@ -262,11 +357,11 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
     def test_request_extra_json(self) -> None:
         request = self.client._build_request(
@@ -334,7 +429,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
@@ -348,7 +443,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
@@ -362,9 +457,38 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"foo": "2"}
 
+    def test_multipart_repeating_array(self, client: OpenAI) -> None:
+        request = client._build_request(
+            FinalRequestOptions.construct(
+                method="get",
+                url="/foo",
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
+                json_data={"array": ["foo", "bar"]},
+                files=[("foo.txt", b"hello world")],
+            )
+        )
+
+        assert request.read().split(b"\r\n") == [
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"foo",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"bar",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
+            b"Content-Type: application/octet-stream",
+            b"",
+            b"hello world",
+            b"--6b7ba517decee4a450543ea6ae821c82--",
+            b"",
+        ]
+
     @pytest.mark.respx(base_url=base_url)
     def test_basic_union_response(self, respx_mock: MockRouter) -> None:
         class Model1(BaseModel):
@@ -422,6 +546,14 @@ class Model(BaseModel):
         assert isinstance(response, Model)
         assert response.foo == 2
 
+    def test_base_url_setter(self) -> None:
+        client = OpenAI(base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True)
+        assert client.base_url == "https://example.com/from_init/"
+
+        client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
+
+        assert client.base_url == "https://example.com/from_setter/"
+
     def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = OpenAI(api_key=api_key, _strict_response_validation=True)
@@ -496,14 +628,6 @@ def test_absolute_request_url(http://github.com/self,%20client:%20OpenAI) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    def test_client_del(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        assert client.is_closed()
-
     def test_copied_client_does_not_close_http(self) -> None:
         client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -511,9 +635,8 @@ def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     def test_client_context_manager(self) -> None:
@@ -536,6 +659,10 @@ class Model(BaseModel):
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
+    def test_client_max_retries_validation(self) -> None:
+        with pytest.raises(TypeError, match=r"max_retries cannot be None"):
+            OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None))
+
     @pytest.mark.respx(base_url=base_url)
     def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
@@ -543,8 +670,9 @@ class Model(BaseModel):
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.post("/foo", cast_to=Model, stream=True)
-        assert isinstance(response, Stream)
+        stream = self.client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
+        assert isinstance(stream, Stream)
+        stream.response.close()
 
     @pytest.mark.respx(base_url=base_url)
     def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
@@ -581,6 +709,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -592,6 +721,224 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: OpenAI) -> None:
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__enter__()
+
+        assert _get_open_connections(self.client) == 0
+
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: OpenAI) -> None:
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
+
+        with pytest.raises(APIStatusError):
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__enter__()
+        assert _get_open_connections(self.client) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    def test_retries_taken(
+        self,
+        client: OpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_omit_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_overwrite_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retries_taken_new_response_class(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org/")
+
+        client = DefaultHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+        # Test that the default follow_redirects=True allows following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
+
+        response = self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+        # Test that follow_redirects=False prevents following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+
+        with pytest.raises(APIStatusError) as exc_info:
+            self.client.post(
+                "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
+            )
+
+        assert exc_info.value.response.status_code == 302
+        assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
+
 
 class TestAsyncOpenAI:
     client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -728,6 +1075,69 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
+    def test_copy_build_request(self) -> None:
+        options = FinalRequestOptions(method="get", url="/foo")
+
+        def build_request(options: FinalRequestOptions) -> None:
+            client = self.client.copy()
+            client._build_request(options)
+
+        # ensure that the machinery is warmed up before tracing starts.
+        build_request(options)
+        gc.collect()
+
+        tracemalloc.start(1000)
+
+        snapshot_before = tracemalloc.take_snapshot()
+
+        ITERATIONS = 10
+        for _ in range(ITERATIONS):
+            build_request(options)
+
+        gc.collect()
+        snapshot_after = tracemalloc.take_snapshot()
+
+        tracemalloc.stop()
+
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
+            if diff.count == 0:
+                # Avoid false positives by considering only leaks (i.e. allocations that persist).
+                return
+
+            if diff.count % ITERATIONS != 0:
+                # Avoid false positives by considering only leaks that appear per iteration.
+                return
+
+            for frame in diff.traceback:
+                if any(
+                    frame.filename.endswith(fragment)
+                    for fragment in [
+                        # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
+                        #
+                        # removing the decorator fixes the leak for reasons we don't understand.
+                        "openai/_legacy_response.py",
+                        "openai/_response.py",
+                        # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
+                        "openai/_compat.py",
+                        # Standard library leaks we don't care about.
+                        "/logging/__init__.py",
+                    ]
+                ):
+                    return
+
+            leaks.append(diff)
+
+        leaks: list[tracemalloc.StatisticDiff] = []
+        for diff in snapshot_after.compare_to(snapshot_before, "traceback"):
+            add_leak(leaks, diff)
+        if leaks:
+            for leak in leaks:
+                print("MEMORY LEAK:", leak)
+                for frame in leak.traceback:
+                    print(frame)
+            raise AssertionError()
+
     async def test_request_timeout(self) -> None:
         request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -779,6 +1189,16 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+    def test_invalid_http_client(self) -> None:
+        with pytest.raises(TypeError, match="Invalid `http_client` arg"):
+            with httpx.Client() as http_client:
+                AsyncOpenAI(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
+
     def test_default_headers_option(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
@@ -805,8 +1225,9 @@ def test_validate_headers(self) -> None:
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
-        with pytest.raises(Exception):
-            client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+        with pytest.raises(OpenAIError):
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
     def test_default_query_option(self) -> None:
@@ -821,11 +1242,11 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
     def test_request_extra_json(self) -> None:
         request = self.client._build_request(
@@ -893,7 +1314,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
@@ -907,7 +1328,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
@@ -921,9 +1342,38 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"foo": "2"}
 
+    def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
+        request = async_client._build_request(
+            FinalRequestOptions.construct(
+                method="get",
+                url="/foo",
+                headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
+                json_data={"array": ["foo", "bar"]},
+                files=[("foo.txt", b"hello world")],
+            )
+        )
+
+        assert request.read().split(b"\r\n") == [
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"foo",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="array[]"',
+            b"",
+            b"bar",
+            b"--6b7ba517decee4a450543ea6ae821c82",
+            b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
+            b"Content-Type: application/octet-stream",
+            b"",
+            b"hello world",
+            b"--6b7ba517decee4a450543ea6ae821c82--",
+            b"",
+        ]
+
     @pytest.mark.respx(base_url=base_url)
     async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
         class Model1(BaseModel):
@@ -981,6 +1431,16 @@ class Model(BaseModel):
         assert isinstance(response, Model)
         assert response.foo == 2
 
+    def test_base_url_setter(self) -> None:
+        client = AsyncOpenAI(
+            base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
+        )
+        assert client.base_url == "https://example.com/from_init/"
+
+        client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
+
+        assert client.base_url == "https://example.com/from_setter/"
+
     def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = AsyncOpenAI(api_key=api_key, _strict_response_validation=True)
@@ -1061,15 +1521,6 @@ def test_absolute_request_url(http://github.com/self,%20client:%20AsyncOpenAI) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    async def test_client_del(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        await asyncio.sleep(0.2)
-        assert client.is_closed()
-
     async def test_copied_client_does_not_close_http(self) -> None:
         client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -1077,10 +1528,9 @@ async def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
         await asyncio.sleep(0.2)
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     async def test_client_context_manager(self) -> None:
@@ -1104,6 +1554,12 @@ class Model(BaseModel):
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
+    async def test_client_max_retries_validation(self) -> None:
+        with pytest.raises(TypeError, match=r"max_retries cannot be None"):
+            AsyncOpenAI(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
+
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
     async def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
@@ -1112,8 +1568,9 @@ class Model(BaseModel):
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.post("/foo", cast_to=Model, stream=True)
-        assert isinstance(response, AsyncStream)
+        stream = await self.client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
+        assert isinstance(stream, AsyncStream)
+        await stream.response.aclose()
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -1151,6 +1608,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -1162,3 +1620,270 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
+
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
+        respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__aenter__()
+
+        assert _get_open_connections(self.client) == 0
+
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
+        respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
+
+        with pytest.raises(APIStatusError):
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__aenter__()
+        assert _get_open_connections(self.client) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    async def test_retries_taken(
+        self,
+        async_client: AsyncOpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_omit_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_overwrite_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_retries_taken_new_response_class(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        async with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    def test_get_platform(self) -> None:
+        # A previous implementation of asyncify could leave threads unterminated when
+        # used with nest_asyncio.
+        #
+        # Since nest_asyncio.apply() is global and cannot be un-applied, this
+        # test is run in a separate process to avoid affecting other tests.
+        test_code = dedent("""
+        import asyncio
+        import nest_asyncio
+        import threading
+
+        from openai._utils import asyncify
+        from openai._base_client import get_platform
+
+        async def test_main() -> None:
+            result = await asyncify(get_platform)()
+            print(result)
+            for thread in threading.enumerate():
+                print(thread.name)
+
+        nest_asyncio.apply()
+        asyncio.run(test_main())
+        """)
+        with subprocess.Popen(
+            [sys.executable, "-c", test_code],
+            text=True,
+        ) as process:
+            timeout = 10  # seconds
+
+            start_time = time.monotonic()
+            while True:
+                return_code = process.poll()
+                if return_code is not None:
+                    if return_code != 0:
+                        raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
+
+                    # success
+                    break
+
+                if time.monotonic() - start_time > timeout:
+                    process.kill()
+                    raise AssertionError("calling get_platform using asyncify resulted in a hung process")
+
+                time.sleep(0.1)
+
+    async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org/")
+
+        client = DefaultAsyncHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    async def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultAsyncHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+        # Test that the default follow_redirects=True allows following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
+
+        response = await self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+        # Test that follow_redirects=False prevents following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+
+        with pytest.raises(APIStatusError) as exc_info:
+            await self.client.post(
+                "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
+            )
+
+        assert exc_info.value.response.status_code == 302
+        assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
index 8cf65ce94e..86a2adb1a2 100644
--- a/tests/test_deepcopy.py
+++ b/tests/test_deepcopy.py
@@ -41,8 +41,7 @@ def test_nested_list() -> None:
     assert_different_identities(obj1[1], obj2[1])
 
 
-class MyObject:
-    ...
+class MyObject: ...
 
 
 def test_ignores_other_types() -> None:
diff --git a/tests/test_legacy_response.py b/tests/test_legacy_response.py
new file mode 100644
index 0000000000..9da1a80659
--- /dev/null
+++ b/tests/test_legacy_response.py
@@ -0,0 +1,153 @@
+import json
+from typing import Any, Union, cast
+from typing_extensions import Annotated
+
+import httpx
+import pytest
+import pydantic
+
+from openai import OpenAI, BaseModel
+from openai._streaming import Stream
+from openai._base_client import FinalRequestOptions
+from openai._legacy_response import LegacyAPIResponse
+
+from .utils import rich_print_str
+
+
+class PydanticModel(pydantic.BaseModel): ...
+
+
+def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        response.parse(to=PydanticModel)
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+def test_response_parse_custom_stream(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+class CustomModel(BaseModel):
+    foo: str
+    bar: int
+
+
+def test_response_parse_custom_model(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+def test_response_basemodel_request_id(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+    assert "_request_id" not in rich_print_str(obj)
+    assert "__exclude_fields__" not in rich_print_str(obj)
+
+
+def test_response_parse_annotated_type(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+class OtherModel(pydantic.BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_models.py b/tests/test_models.py
index 713bd2cb1b..440e17a08c 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,14 +1,15 @@
 import json
 from typing import Any, Dict, List, Union, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal
+from typing_extensions import Literal, Annotated, TypeAliasType
 
 import pytest
 import pydantic
 from pydantic import Field
 
+from openai._utils import PropertyInfo
 from openai._compat import PYDANTIC_V2, parse_obj, model_dump, model_json
-from openai._models import BaseModel
+from openai._models import BaseModel, construct_type
 
 
 class BasicModel(BaseModel):
@@ -30,7 +31,7 @@ class NestedModel(BaseModel):
 
     # mismatched types
     m = NestedModel.construct(nested="hello!")
-    assert m.nested == "hello!"
+    assert cast(Any, m.nested) == "hello!"
 
 
 def test_optional_nested_model() -> None:
@@ -47,7 +48,7 @@ class NestedModel(BaseModel):
     # mismatched types
     m3 = NestedModel.construct(nested={"foo"})
     assert isinstance(cast(Any, m3.nested), set)
-    assert m3.nested == {"foo"}
+    assert cast(Any, m3.nested) == {"foo"}
 
 
 def test_list_nested_model() -> None:
@@ -244,7 +245,7 @@ class Model(BaseModel):
     assert m.foo is True
 
     m = Model.construct(foo="CARD_HOLDER")
-    assert m.foo is "CARD_HOLDER"
+    assert m.foo == "CARD_HOLDER"
 
     m = Model.construct(foo={"bar": False})
     assert isinstance(m.foo, Submodel1)
@@ -322,7 +323,7 @@ class Model(BaseModel):
     assert len(m.items) == 2
     assert isinstance(m.items[0], Submodel1)
     assert m.items[0].level == -1
-    assert m.items[1] == 156
+    assert cast(Any, m.items[1]) == 156
 
 
 def test_union_of_lists() -> None:
@@ -354,7 +355,7 @@ class Model(BaseModel):
     assert len(m.items) == 2
     assert isinstance(m.items[0], SubModel1)
     assert m.items[0].level == -1
-    assert m.items[1] == 156
+    assert cast(Any, m.items[1]) == 156
 
 
 def test_dict_of_union() -> None:
@@ -491,15 +492,50 @@ class Model(BaseModel):
         resource_id: Optional[str] = None
 
     m = Model.construct()
+    assert m.resource_id is None
     assert "resource_id" not in m.model_fields_set
 
     m = Model.construct(resource_id=None)
+    assert m.resource_id is None
     assert "resource_id" in m.model_fields_set
 
     m = Model.construct(resource_id="foo")
+    assert m.resource_id == "foo"
     assert "resource_id" in m.model_fields_set
 
 
+def test_to_dict() -> None:
+    class Model(BaseModel):
+        foo: Optional[str] = Field(alias="FOO", default=None)
+
+    m = Model(FOO="hello")
+    assert m.to_dict() == {"FOO": "hello"}
+    assert m.to_dict(use_api_names=False) == {"foo": "hello"}
+
+    m2 = Model()
+    assert m2.to_dict() == {}
+    assert m2.to_dict(exclude_unset=False) == {"FOO": None}
+    assert m2.to_dict(exclude_unset=False, exclude_none=True) == {}
+    assert m2.to_dict(exclude_unset=False, exclude_defaults=True) == {}
+
+    m3 = Model(FOO=None)
+    assert m3.to_dict() == {"FOO": None}
+    assert m3.to_dict(exclude_none=True) == {}
+    assert m3.to_dict(exclude_defaults=True) == {}
+
+    class Model2(BaseModel):
+        created_at: datetime
+
+    time_str = "2024-03-21T11:39:01.275859"
+    m4 = Model2.construct(created_at=time_str)
+    assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
+    assert m4.to_dict(mode="json") == {"created_at": time_str}
+
+    if not PYDANTIC_V2:
+        with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
+            m.to_dict(warnings=False)
+
+
 def test_forwards_compat_model_dump_method() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -521,9 +557,6 @@ class Model(BaseModel):
     assert m3.model_dump(exclude_none=True) == {}
 
     if not PYDANTIC_V2:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.model_dump(mode="json")
-
         with pytest.raises(ValueError, match="round_trip is only supported in Pydantic v2"):
             m.model_dump(round_trip=True)
 
@@ -531,6 +564,42 @@ class Model(BaseModel):
             m.model_dump(warnings=False)
 
 
+def test_compat_method_no_error_for_warnings() -> None:
+    class Model(BaseModel):
+        foo: Optional[str]
+
+    m = Model(foo="hello")
+    assert isinstance(model_dump(m, warnings=False), dict)
+
+
+def test_to_json() -> None:
+    class Model(BaseModel):
+        foo: Optional[str] = Field(alias="FOO", default=None)
+
+    m = Model(FOO="hello")
+    assert json.loads(m.to_json()) == {"FOO": "hello"}
+    assert json.loads(m.to_json(use_api_names=False)) == {"foo": "hello"}
+
+    if PYDANTIC_V2:
+        assert m.to_json(indent=None) == '{"FOO":"hello"}'
+    else:
+        assert m.to_json(indent=None) == '{"FOO": "hello"}'
+
+    m2 = Model()
+    assert json.loads(m2.to_json()) == {}
+    assert json.loads(m2.to_json(exclude_unset=False)) == {"FOO": None}
+    assert json.loads(m2.to_json(exclude_unset=False, exclude_none=True)) == {}
+    assert json.loads(m2.to_json(exclude_unset=False, exclude_defaults=True)) == {}
+
+    m3 = Model(FOO=None)
+    assert json.loads(m3.to_json()) == {"FOO": None}
+    assert json.loads(m3.to_json(exclude_none=True)) == {}
+
+    if not PYDANTIC_V2:
+        with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
+            m.to_json(warnings=False)
+
+
 def test_forwards_compat_model_dump_json_method() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -571,3 +640,252 @@ class OurModel(BaseModel):
         foo: Optional[str] = None
 
     takes_pydantic(OurModel())
+
+
+def test_annotated_types() -> None:
+    class Model(BaseModel):
+        value: str
+
+    m = construct_type(
+        value={"value": "foo"},
+        type_=cast(Any, Annotated[Model, "random metadata"]),
+    )
+    assert isinstance(m, Model)
+    assert m.value == "foo"
+
+
+def test_discriminated_unions_invalid_data() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "a", "data": 100},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, A)
+    assert m.type == "a"
+    if PYDANTIC_V2:
+        assert m.data == 100  # type: ignore[comparison-overlap]
+    else:
+        # pydantic v1 automatically converts inputs to strings
+        # if the expected type is a str
+        assert m.data == "100"
+
+
+def test_discriminated_unions_unknown_variant() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "c", "data": None, "new_thing": "bar"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+
+    # just chooses the first variant
+    assert isinstance(m, A)
+    assert m.type == "c"  # type: ignore[comparison-overlap]
+    assert m.data == None  # type: ignore[unreachable]
+    assert m.new_thing == "bar"
+
+
+def test_discriminated_unions_invalid_data_nested_unions() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    class C(BaseModel):
+        type: Literal["c"]
+
+        data: bool
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[Union[A, B], C], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "c", "data": "foo"},
+        type_=cast(Any, Annotated[Union[Union[A, B], C], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, C)
+    assert m.type == "c"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+
+def test_discriminated_unions_with_aliases_invalid_data() -> None:
+    class A(BaseModel):
+        foo_type: Literal["a"] = Field(alias="type")
+
+        data: str
+
+    class B(BaseModel):
+        foo_type: Literal["b"] = Field(alias="type")
+
+        data: int
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="foo_type")]),
+    )
+    assert isinstance(m, B)
+    assert m.foo_type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    m = construct_type(
+        value={"type": "a", "data": 100},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="foo_type")]),
+    )
+    assert isinstance(m, A)
+    assert m.foo_type == "a"
+    if PYDANTIC_V2:
+        assert m.data == 100  # type: ignore[comparison-overlap]
+    else:
+        # pydantic v1 automatically converts inputs to strings
+        # if the expected type is a str
+        assert m.data == "100"
+
+
+def test_discriminated_unions_overlapping_discriminators_invalid_data() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["a"]
+
+        data: int
+
+    m = construct_type(
+        value={"type": "a", "data": "foo"},
+        type_=cast(Any, Annotated[Union[A, B], PropertyInfo(discriminator="type")]),
+    )
+    assert isinstance(m, B)
+    assert m.type == "a"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+
+def test_discriminated_unions_invalid_data_uses_cache() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: str
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: int
+
+    UnionType = cast(Any, Union[A, B])
+
+    assert not hasattr(UnionType, "__discriminator__")
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"}, type_=cast(Any, Annotated[UnionType, PropertyInfo(discriminator="type")])
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    discriminator = UnionType.__discriminator__
+    assert discriminator is not None
+
+    m = construct_type(
+        value={"type": "b", "data": "foo"}, type_=cast(Any, Annotated[UnionType, PropertyInfo(discriminator="type")])
+    )
+    assert isinstance(m, B)
+    assert m.type == "b"
+    assert m.data == "foo"  # type: ignore[comparison-overlap]
+
+    # if the discriminator details object stays the same between invocations then
+    # we hit the cache
+    assert UnionType.__discriminator__ is discriminator
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_type_alias_type() -> None:
+    Alias = TypeAliasType("Alias", str)  # pyright: ignore
+
+    class Model(BaseModel):
+        alias: Alias
+        union: Union[int, Alias]
+
+    m = construct_type(value={"alias": "foo", "union": "bar"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.alias, str)
+    assert m.alias == "foo"
+    assert isinstance(m.union, str)
+    assert m.union == "bar"
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_field_named_cls() -> None:
+    class Model(BaseModel):
+        cls: str
+
+    m = construct_type(value={"cls": "foo"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.cls, str)
+
+
+def test_discriminated_union_case() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: List[Union[A, object]]
+
+    class ModelA(BaseModel):
+        type: Literal["modelA"]
+
+        data: int
+
+    class ModelB(BaseModel):
+        type: Literal["modelB"]
+
+        required: str
+
+        data: Union[A, B]
+
+    # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required`
+    m = construct_type(
+        value={"type": "modelB", "data": {"type": "a", "data": True}},
+        type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]),
+    )
+
+    assert isinstance(m, ModelB)
diff --git a/tests/test_module_client.py b/tests/test_module_client.py
index 50b7369e19..9c9a1addab 100644
--- a/tests/test_module_client.py
+++ b/tests/test_module_client.py
@@ -1,4 +1,4 @@
-# File generated from our OpenAPI spec by Stainless.
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
 
@@ -16,6 +16,8 @@ def reset_state() -> None:
     openai._reset_client()
     openai.api_key = None or "My API Key"
     openai.organization = None
+    openai.project = None
+    openai.webhook_secret = None
     openai.base_url = None
     openai.timeout = DEFAULT_TIMEOUT
     openai.max_retries = DEFAULT_MAX_RETRIES
@@ -109,6 +111,7 @@ def fresh_env() -> Iterator[None]:
         _os.environ.clear()
         yield
     finally:
+        _os.environ.clear()
         _os.environ.update(old)
 
 
@@ -129,7 +132,7 @@ def test_azure_api_key_env_without_api_version() -> None:
             ValueError,
             match=r"Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable",
         ):
-            openai.completions._client
+            openai.completions._client  # noqa: B018
 
 
 def test_azure_api_key_and_version_env() -> None:
@@ -142,7 +145,7 @@ def test_azure_api_key_and_version_env() -> None:
             ValueError,
             match=r"Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable",
         ):
-            openai.completions._client
+            openai.completions._client  # noqa: B018
 
 
 def test_azure_api_key_version_and_endpoint_env() -> None:
@@ -152,7 +155,7 @@ def test_azure_api_key_version_and_endpoint_env() -> None:
         _os.environ["OPENAI_API_VERSION"] = "example-version"
         _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example/"
 
-        openai.completions._client
+        openai.completions._client  # noqa: B018
 
         assert openai.api_type == "azure"
 
diff --git a/tests/test_required_args.py b/tests/test_required_args.py
index 1de017db24..5d1a5224ff 100644
--- a/tests/test_required_args.py
+++ b/tests/test_required_args.py
@@ -43,7 +43,7 @@ def foo(*, a: str | None = None) -> str | None:
 def test_multiple_params() -> None:
     @required_args(["a", "b", "c"])
     def foo(a: str = "", *, b: str = "", c: str = "") -> str | None:
-        return a + " " + b + " " + c
+        return f"{a} {b} {c}"
 
     assert foo(a="a", b="b", c="c") == "a b c"
 
diff --git a/tests/test_response.py b/tests/test_response.py
new file mode 100644
index 0000000000..43f24c150d
--- /dev/null
+++ b/tests/test_response.py
@@ -0,0 +1,324 @@
+import json
+from typing import Any, List, Union, cast
+from typing_extensions import Annotated
+
+import httpx
+import pytest
+import pydantic
+
+from openai import OpenAI, BaseModel, AsyncOpenAI
+from openai._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    extract_response_type,
+)
+from openai._streaming import Stream
+from openai._base_client import FinalRequestOptions
+
+from .utils import rich_print_str
+
+
+class ConcreteBaseAPIResponse(APIResponse[bytes]): ...
+
+
+class ConcreteAPIResponse(APIResponse[List[str]]): ...
+
+
+class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]): ...
+
+
+def test_extract_response_type_direct_classes() -> None:
+    assert extract_response_type(BaseAPIResponse[str]) == str
+    assert extract_response_type(APIResponse[str]) == str
+    assert extract_response_type(AsyncAPIResponse[str]) == str
+
+
+def test_extract_response_type_direct_class_missing_type_arg() -> None:
+    with pytest.raises(
+        RuntimeError,
+        match="Expected type <class 'openai._response.AsyncAPIResponse'> to have a type argument at index 0 but it did not",
+    ):
+        extract_response_type(AsyncAPIResponse)
+
+
+def test_extract_response_type_concrete_subclasses() -> None:
+    assert extract_response_type(ConcreteBaseAPIResponse) == bytes
+    assert extract_response_type(ConcreteAPIResponse) == List[str]
+    assert extract_response_type(ConcreteAsyncAPIResponse) == httpx.Response
+
+
+def test_extract_response_type_binary_response() -> None:
+    assert extract_response_type(BinaryAPIResponse) == bytes
+    assert extract_response_type(AsyncBinaryAPIResponse) == bytes
+
+
+class PydanticModel(pydantic.BaseModel): ...
+
+
+def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        response.parse(to=PydanticModel)
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_mismatched_basemodel(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    with pytest.raises(
+        TypeError,
+        match="Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`",
+    ):
+        await response.parse(to=PydanticModel)
+
+
+def test_response_parse_custom_stream(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_custom_stream(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo"),
+        client=async_client,
+        stream=True,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    stream = await response.parse(to=Stream[int])
+    assert stream._cast_to == int
+
+
+class CustomModel(BaseModel):
+    foo: str
+    bar: int
+
+
+def test_response_parse_custom_model(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+@pytest.mark.asyncio
+async def test_async_response_parse_custom_model(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=CustomModel)
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+def test_response_basemodel_request_id(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+    assert "_request_id" not in rich_print_str(obj)
+    assert "__exclude_fields__" not in rich_print_str(obj)
+
+
+@pytest.mark.asyncio
+async def test_async_response_basemodel_request_id(client: OpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(
+            200,
+            headers={"x-request-id": "my-req-id"},
+            content=json.dumps({"foo": "hello!", "bar": 2}),
+        ),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=CustomModel)
+    assert obj._request_id == "my-req-id"
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+    assert obj.to_dict() == {"foo": "hello!", "bar": 2}
+
+
+def test_response_parse_annotated_type(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+async def test_async_response_parse_annotated_type(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(
+        to=cast("type[CustomModel]", Annotated[CustomModel, "random metadata"]),
+    )
+    assert obj.foo == "hello!"
+    assert obj.bar == 2
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+async def test_async_response_parse_bool(client: AsyncOpenAI, content: str, expected: bool) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = await response.parse(to=bool)
+    assert result is expected
+
+
+class OtherModel(BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("async_client", [False], indirect=True)  # loose validation
+async def test_async_response_parse_expect_model_union_non_json_content(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 75e4ca2699..04f8e51abd 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -1,104 +1,248 @@
+from __future__ import annotations
+
 from typing import Iterator, AsyncIterator
 
+import httpx
 import pytest
 
-from openai._streaming import SSEDecoder
+from openai import OpenAI, AsyncOpenAI
+from openai._streaming import Stream, AsyncStream, ServerSentEvent
 
 
 @pytest.mark.asyncio
-async def test_basic_async() -> None:
-    async def body() -> AsyncIterator[str]:
-        yield "event: completion"
-        yield 'data: {"foo":true}'
-        yield ""
-
-    async for sse in SSEDecoder().aiter(body()):
-        assert sse.event == "completion"
-        assert sse.json() == {"foo": True}
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_basic(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: completion\n"
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
 
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-def test_basic() -> None:
-    def body() -> Iterator[str]:
-        yield "event: completion"
-        yield 'data: {"foo":true}'
-        yield ""
-
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.json() == {"foo": True}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
 
 
-def test_data_missing_event() -> None:
-    def body() -> Iterator[str]:
-        yield 'data: {"foo":true}'
-        yield ""
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_data_missing_event(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
 
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
     assert sse.event is None
     assert sse.json() == {"foo": True}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
+
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_event_missing_data(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"\n"
 
-def test_event_missing_data() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield ""
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-    it = SSEDecoder().iter(body())
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.data == ""
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
 
 
-def test_multiple_events() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield ""
-        yield "event: completion"
-        yield ""
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_events(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"\n"
+        yield b"event: completion\n"
+        yield b"\n"
 
-    it = SSEDecoder().iter(body())
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.data == ""
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.data == ""
 
-    with pytest.raises(StopIteration):
-        next(it)
-
-
-def test_multiple_events_with_data() -> None:
-    def body() -> Iterator[str]:
-        yield "event: ping"
-        yield 'data: {"foo":true}'
-        yield ""
-        yield "event: completion"
-        yield 'data: {"bar":false}'
-        yield ""
+    await assert_empty_iter(iterator)
 
-    it = SSEDecoder().iter(body())
 
-    sse = next(it)
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_events_with_data(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b'data: {"foo":true}\n'
+        yield b"\n"
+        yield b"event: completion\n"
+        yield b'data: {"bar":false}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
     assert sse.event == "ping"
     assert sse.json() == {"foo": True}
 
-    sse = next(it)
+    sse = await iter_next(iterator)
     assert sse.event == "completion"
     assert sse.json() == {"bar": False}
 
-    with pytest.raises(StopIteration):
-        next(it)
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_data_lines_with_empty_line(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"data: {\n"
+        yield b'data: "foo":\n'
+        yield b"data: \n"
+        yield b"data:\n"
+        yield b"data: true}\n"
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": True}
+    assert sse.data == '{\n"foo":\n\n\ntrue}'
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_data_json_escaped_double_new_line(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b'data: {"foo": "my long\\n\\ncontent"}'
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": "my long\n\ncontent"}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multiple_data_lines(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
+    def body() -> Iterator[bytes]:
+        yield b"event: ping\n"
+        yield b"data: {\n"
+        yield b'data: "foo":\n'
+        yield b"data: true}\n"
+        yield b"\n\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event == "ping"
+    assert sse.json() == {"foo": True}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_special_new_line_character(
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"content":" culpa"}\n'
+        yield b"\n"
+        yield b'data: {"content":" \xe2\x80\xa8"}\n'
+        yield b"\n"
+        yield b'data: {"content":"foo"}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": " culpa"}
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "  "}
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "foo"}
+
+    await assert_empty_iter(iterator)
+
+
+@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
+async def test_multi_byte_character_multiple_chunks(
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> None:
+    def body() -> Iterator[bytes]:
+        yield b'data: {"content":"'
+        # bytes taken from the string 'известни' and arbitrarily split
+        # so that some multi-byte characters span multiple chunks
+        yield b"\xd0"
+        yield b"\xb8\xd0\xb7\xd0"
+        yield b"\xb2\xd0\xb5\xd1\x81\xd1\x82\xd0\xbd\xd0\xb8"
+        yield b'"}\n'
+        yield b"\n"
+
+    iterator = make_event_iterator(content=body(), sync=sync, client=client, async_client=async_client)
+
+    sse = await iter_next(iterator)
+    assert sse.event is None
+    assert sse.json() == {"content": "известни"}
+
+
+async def to_aiter(iter: Iterator[bytes]) -> AsyncIterator[bytes]:
+    for chunk in iter:
+        yield chunk
+
+
+async def iter_next(iter: Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]) -> ServerSentEvent:
+    if isinstance(iter, AsyncIterator):
+        return await iter.__anext__()
+
+    return next(iter)
+
+
+async def assert_empty_iter(iter: Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]) -> None:
+    with pytest.raises((StopAsyncIteration, RuntimeError)):
+        await iter_next(iter)
+
+
+def make_event_iterator(
+    content: Iterator[bytes],
+    *,
+    sync: bool,
+    client: OpenAI,
+    async_client: AsyncOpenAI,
+) -> Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]:
+    if sync:
+        return Stream(cast_to=object, client=client, response=httpx.Response(200, content=content))._iter_events()
+
+    return AsyncStream(
+        cast_to=object, client=async_client, response=httpx.Response(200, content=to_aiter(content))
+    )._iter_events()
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 5e15385f4d..965f65f74f 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -1,22 +1,50 @@
 from __future__ import annotations
 
-from typing import Any, List, Union, Optional
+import io
+import pathlib
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
 import pytest
 
-from openai._utils import PropertyInfo, transform, parse_datetime
+from openai._types import NOT_GIVEN, Base64FileInput
+from openai._utils import (
+    PropertyInfo,
+    transform as _transform,
+    parse_datetime,
+    async_transform as _async_transform,
+)
 from openai._compat import PYDANTIC_V2
 from openai._models import BaseModel
 
+_T = TypeVar("_T")
+
+SAMPLE_FILE_PATH = pathlib.Path(__file__).parent.joinpath("sample_file.txt")
+
+
+async def transform(
+    data: _T,
+    expected_type: object,
+    use_async: bool,
+) -> _T:
+    if use_async:
+        return await _async_transform(data, expected_type=expected_type)
+
+    return _transform(data, expected_type=expected_type)
+
+
+parametrize = pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"])
+
 
 class Foo1(TypedDict):
     foo_bar: Annotated[str, PropertyInfo(alias="fooBar")]
 
 
-def test_top_level_alias() -> None:
-    assert transform({"foo_bar": "hello"}, expected_type=Foo1) == {"fooBar": "hello"}
+@parametrize
+@pytest.mark.asyncio
+async def test_top_level_alias(use_async: bool) -> None:
+    assert await transform({"foo_bar": "hello"}, expected_type=Foo1, use_async=use_async) == {"fooBar": "hello"}
 
 
 class Foo2(TypedDict):
@@ -32,9 +60,11 @@ class Baz2(TypedDict):
     my_baz: Annotated[str, PropertyInfo(alias="myBaz")]
 
 
-def test_recursive_typeddict() -> None:
-    assert transform({"bar": {"this_thing": 1}}, Foo2) == {"bar": {"this__thing": 1}}
-    assert transform({"bar": {"baz": {"my_baz": "foo"}}}, Foo2) == {"bar": {"Baz": {"myBaz": "foo"}}}
+@parametrize
+@pytest.mark.asyncio
+async def test_recursive_typeddict(use_async: bool) -> None:
+    assert await transform({"bar": {"this_thing": 1}}, Foo2, use_async) == {"bar": {"this__thing": 1}}
+    assert await transform({"bar": {"baz": {"my_baz": "foo"}}}, Foo2, use_async) == {"bar": {"Baz": {"myBaz": "foo"}}}
 
 
 class Foo3(TypedDict):
@@ -45,8 +75,10 @@ class Bar3(TypedDict):
     my_field: Annotated[str, PropertyInfo(alias="myField")]
 
 
-def test_list_of_typeddict() -> None:
-    result = transform({"things": [{"my_field": "foo"}, {"my_field": "foo2"}]}, expected_type=Foo3)
+@parametrize
+@pytest.mark.asyncio
+async def test_list_of_typeddict(use_async: bool) -> None:
+    result = await transform({"things": [{"my_field": "foo"}, {"my_field": "foo2"}]}, Foo3, use_async)
     assert result == {"things": [{"myField": "foo"}, {"myField": "foo2"}]}
 
 
@@ -62,10 +94,14 @@ class Baz4(TypedDict):
     foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
 
 
-def test_union_of_typeddict() -> None:
-    assert transform({"foo": {"foo_bar": "bar"}}, Foo4) == {"foo": {"fooBar": "bar"}}
-    assert transform({"foo": {"foo_baz": "baz"}}, Foo4) == {"foo": {"fooBaz": "baz"}}
-    assert transform({"foo": {"foo_baz": "baz", "foo_bar": "bar"}}, Foo4) == {"foo": {"fooBaz": "baz", "fooBar": "bar"}}
+@parametrize
+@pytest.mark.asyncio
+async def test_union_of_typeddict(use_async: bool) -> None:
+    assert await transform({"foo": {"foo_bar": "bar"}}, Foo4, use_async) == {"foo": {"fooBar": "bar"}}
+    assert await transform({"foo": {"foo_baz": "baz"}}, Foo4, use_async) == {"foo": {"fooBaz": "baz"}}
+    assert await transform({"foo": {"foo_baz": "baz", "foo_bar": "bar"}}, Foo4, use_async) == {
+        "foo": {"fooBaz": "baz", "fooBar": "bar"}
+    }
 
 
 class Foo5(TypedDict):
@@ -80,9 +116,11 @@ class Baz5(TypedDict):
     foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
 
 
-def test_union_of_list() -> None:
-    assert transform({"foo": {"foo_bar": "bar"}}, Foo5) == {"FOO": {"fooBar": "bar"}}
-    assert transform(
+@parametrize
+@pytest.mark.asyncio
+async def test_union_of_list(use_async: bool) -> None:
+    assert await transform({"foo": {"foo_bar": "bar"}}, Foo5, use_async) == {"FOO": {"fooBar": "bar"}}
+    assert await transform(
         {
             "foo": [
                 {"foo_baz": "baz"},
@@ -90,6 +128,7 @@ def test_union_of_list() -> None:
             ]
         },
         Foo5,
+        use_async,
     ) == {"FOO": [{"fooBaz": "baz"}, {"fooBaz": "baz"}]}
 
 
@@ -97,8 +136,10 @@ class Foo6(TypedDict):
     bar: Annotated[str, PropertyInfo(alias="Bar")]
 
 
-def test_includes_unknown_keys() -> None:
-    assert transform({"bar": "bar", "baz_": {"FOO": 1}}, Foo6) == {
+@parametrize
+@pytest.mark.asyncio
+async def test_includes_unknown_keys(use_async: bool) -> None:
+    assert await transform({"bar": "bar", "baz_": {"FOO": 1}}, Foo6, use_async) == {
         "Bar": "bar",
         "baz_": {"FOO": 1},
     }
@@ -113,9 +154,11 @@ class Bar7(TypedDict):
     foo: str
 
 
-def test_ignores_invalid_input() -> None:
-    assert transform({"bar": "<foo>"}, Foo7) == {"bAr": "<foo>"}
-    assert transform({"foo": "<foo>"}, Foo7) == {"foo": "<foo>"}
+@parametrize
+@pytest.mark.asyncio
+async def test_ignores_invalid_input(use_async: bool) -> None:
+    assert await transform({"bar": "<foo>"}, Foo7, use_async) == {"bAr": "<foo>"}
+    assert await transform({"foo": "<foo>"}, Foo7, use_async) == {"foo": "<foo>"}
 
 
 class DatetimeDict(TypedDict, total=False):
@@ -134,52 +177,81 @@ class DateDict(TypedDict, total=False):
     foo: Annotated[date, PropertyInfo(format="iso8601")]
 
 
-def test_iso8601_format() -> None:
+class DatetimeModel(BaseModel):
+    foo: datetime
+
+
+class DateModel(BaseModel):
+    foo: Optional[date]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"foo": dt}, DatetimeDict) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    tz = "Z" if PYDANTIC_V2 else "+00:00"
+    assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692" + tz}  # type: ignore[comparison-overlap]
 
     dt = dt.replace(tzinfo=None)
-    assert transform({"foo": dt}, DatetimeDict) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
 
-    assert transform({"foo": None}, DateDict) == {"foo": None}  # type: ignore[comparison-overlap]
-    assert transform({"foo": date.fromisoformat("2023-02-23")}, DateDict) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform({"foo": None}, DateDict, use_async) == {"foo": None}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=None), Any, use_async) == {"foo": None}  # type: ignore
+    assert await transform({"foo": date.fromisoformat("2023-02-23")}, DateDict, use_async) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=date.fromisoformat("2023-02-23")), DateDict, use_async) == {
+        "foo": "2023-02-23"
+    }  # type: ignore[comparison-overlap]
 
 
-def test_optional_iso8601_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_optional_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"bar": dt}, DatetimeDict) == {"bar": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform({"bar": dt}, DatetimeDict, use_async) == {"bar": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
 
-    assert transform({"bar": None}, DatetimeDict) == {"bar": None}
+    assert await transform({"bar": None}, DatetimeDict, use_async) == {"bar": None}
 
 
-def test_required_iso8601_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_required_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"required": dt}, DatetimeDict) == {"required": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform({"required": dt}, DatetimeDict, use_async) == {
+        "required": "2023-02-23T14:16:36.337692+00:00"
+    }  # type: ignore[comparison-overlap]
 
-    assert transform({"required": None}, DatetimeDict) == {"required": None}
+    assert await transform({"required": None}, DatetimeDict, use_async) == {"required": None}
 
 
-def test_union_datetime() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_union_datetime(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
-    assert transform({"union": dt}, DatetimeDict) == {  # type: ignore[comparison-overlap]
+    assert await transform({"union": dt}, DatetimeDict, use_async) == {  # type: ignore[comparison-overlap]
         "union": "2023-02-23T14:16:36.337692+00:00"
     }
 
-    assert transform({"union": "foo"}, DatetimeDict) == {"union": "foo"}
+    assert await transform({"union": "foo"}, DatetimeDict, use_async) == {"union": "foo"}
 
 
-def test_nested_list_iso6801_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_nested_list_iso6801_format(use_async: bool) -> None:
     dt1 = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
     dt2 = parse_datetime("2022-01-15T06:34:23Z")
-    assert transform({"list_": [dt1, dt2]}, DatetimeDict) == {  # type: ignore[comparison-overlap]
+    assert await transform({"list_": [dt1, dt2]}, DatetimeDict, use_async) == {  # type: ignore[comparison-overlap]
         "list_": ["2023-02-23T14:16:36.337692+00:00", "2022-01-15T06:34:23+00:00"]
     }
 
 
-def test_datetime_custom_format() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_datetime_custom_format(use_async: bool) -> None:
     dt = parse_datetime("2022-01-15T06:34:23Z")
 
-    result = transform(dt, Annotated[datetime, PropertyInfo(format="custom", format_template="%H")])
+    result = await transform(dt, Annotated[datetime, PropertyInfo(format="custom", format_template="%H")], use_async)
     assert result == "06"  # type: ignore[comparison-overlap]
 
 
@@ -187,56 +259,74 @@ class DateDictWithRequiredAlias(TypedDict, total=False):
     required_prop: Required[Annotated[date, PropertyInfo(format="iso8601", alias="prop")]]
 
 
-def test_datetime_with_alias() -> None:
-    assert transform({"required_prop": None}, DateDictWithRequiredAlias) == {"prop": None}  # type: ignore[comparison-overlap]
-    assert transform({"required_prop": date.fromisoformat("2023-02-23")}, DateDictWithRequiredAlias) == {"prop": "2023-02-23"}  # type: ignore[comparison-overlap]
+@parametrize
+@pytest.mark.asyncio
+async def test_datetime_with_alias(use_async: bool) -> None:
+    assert await transform({"required_prop": None}, DateDictWithRequiredAlias, use_async) == {"prop": None}  # type: ignore[comparison-overlap]
+    assert await transform(
+        {"required_prop": date.fromisoformat("2023-02-23")}, DateDictWithRequiredAlias, use_async
+    ) == {"prop": "2023-02-23"}  # type: ignore[comparison-overlap]
 
 
 class MyModel(BaseModel):
     foo: str
 
 
-def test_pydantic_model_to_dictionary() -> None:
-    assert transform(MyModel(foo="hi!"), Any) == {"foo": "hi!"}
-    assert transform(MyModel.construct(foo="hi!"), Any) == {"foo": "hi!"}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_model_to_dictionary(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel(foo="hi!"), Any, use_async)) == {"foo": "hi!"}
+    assert cast(Any, await transform(MyModel.construct(foo="hi!"), Any, use_async)) == {"foo": "hi!"}
 
 
-def test_pydantic_empty_model() -> None:
-    assert transform(MyModel.construct(), Any) == {}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_empty_model(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel.construct(), Any, use_async)) == {}
 
 
-def test_pydantic_unknown_field() -> None:
-    assert transform(MyModel.construct(my_untyped_field=True), Any) == {"my_untyped_field": True}
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_unknown_field(use_async: bool) -> None:
+    assert cast(Any, await transform(MyModel.construct(my_untyped_field=True), Any, use_async)) == {
+        "my_untyped_field": True
+    }
 
 
-def test_pydantic_mismatched_types() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_mismatched_types(use_async: bool) -> None:
     model = MyModel.construct(foo=True)
     if PYDANTIC_V2:
         with pytest.warns(UserWarning):
-            params = transform(model, Any)
+            params = await transform(model, Any, use_async)
     else:
-        params = transform(model, Any)
-    assert params == {"foo": True}
+        params = await transform(model, Any, use_async)
+    assert cast(Any, params) == {"foo": True}
 
 
-def test_pydantic_mismatched_object_type() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_mismatched_object_type(use_async: bool) -> None:
     model = MyModel.construct(foo=MyModel.construct(hello="world"))
     if PYDANTIC_V2:
         with pytest.warns(UserWarning):
-            params = transform(model, Any)
+            params = await transform(model, Any, use_async)
     else:
-        params = transform(model, Any)
-    assert params == {"foo": {"hello": "world"}}
+        params = await transform(model, Any, use_async)
+    assert cast(Any, params) == {"foo": {"hello": "world"}}
 
 
 class ModelNestedObjects(BaseModel):
     nested: MyModel
 
 
-def test_pydantic_nested_objects() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_nested_objects(use_async: bool) -> None:
     model = ModelNestedObjects.construct(nested={"foo": "stainless"})
     assert isinstance(model.nested, MyModel)
-    assert transform(model, Any) == {"nested": {"foo": "stainless"}}
+    assert cast(Any, await transform(model, Any, use_async)) == {"nested": {"foo": "stainless"}}
 
 
 class ModelWithDefaultField(BaseModel):
@@ -245,21 +335,119 @@ class ModelWithDefaultField(BaseModel):
     with_str_default: str = "foo"
 
 
-def test_pydantic_default_field() -> None:
+@parametrize
+@pytest.mark.asyncio
+async def test_pydantic_default_field(use_async: bool) -> None:
     # should be excluded when defaults are used
     model = ModelWithDefaultField.construct()
     assert model.with_none_default is None
     assert model.with_str_default == "foo"
-    assert transform(model, Any) == {}
+    assert cast(Any, await transform(model, Any, use_async)) == {}
 
     # should be included when the default value is explicitly given
     model = ModelWithDefaultField.construct(with_none_default=None, with_str_default="foo")
     assert model.with_none_default is None
     assert model.with_str_default == "foo"
-    assert transform(model, Any) == {"with_none_default": None, "with_str_default": "foo"}
+    assert cast(Any, await transform(model, Any, use_async)) == {"with_none_default": None, "with_str_default": "foo"}
 
     # should be included when a non-default value is explicitly given
     model = ModelWithDefaultField.construct(with_none_default="bar", with_str_default="baz")
     assert model.with_none_default == "bar"
     assert model.with_str_default == "baz"
-    assert transform(model, Any) == {"with_none_default": "bar", "with_str_default": "baz"}
+    assert cast(Any, await transform(model, Any, use_async)) == {"with_none_default": "bar", "with_str_default": "baz"}
+
+
+class TypedDictIterableUnion(TypedDict):
+    foo: Annotated[Union[Bar8, Iterable[Baz8]], PropertyInfo(alias="FOO")]
+
+
+class Bar8(TypedDict):
+    foo_bar: Annotated[str, PropertyInfo(alias="fooBar")]
+
+
+class Baz8(TypedDict):
+    foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_iterable_of_dictionaries(use_async: bool) -> None:
+    assert await transform({"foo": [{"foo_baz": "bar"}]}, TypedDictIterableUnion, use_async) == {
+        "FOO": [{"fooBaz": "bar"}]
+    }
+    assert cast(Any, await transform({"foo": ({"foo_baz": "bar"},)}, TypedDictIterableUnion, use_async)) == {
+        "FOO": [{"fooBaz": "bar"}]
+    }
+
+    def my_iter() -> Iterable[Baz8]:
+        yield {"foo_baz": "hello"}
+        yield {"foo_baz": "world"}
+
+    assert await transform({"foo": my_iter()}, TypedDictIterableUnion, use_async) == {
+        "FOO": [{"fooBaz": "hello"}, {"fooBaz": "world"}]
+    }
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+    class DictItems(TypedDict):
+        foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+    assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
+class TypedDictIterableUnionStr(TypedDict):
+    foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_iterable_union_str(use_async: bool) -> None:
+    assert await transform({"foo": "bar"}, TypedDictIterableUnionStr, use_async) == {"FOO": "bar"}
+    assert cast(Any, await transform(iter([{"foo_baz": "bar"}]), Union[str, Iterable[Baz8]], use_async)) == [
+        {"fooBaz": "bar"}
+    ]
+
+
+class TypedDictBase64Input(TypedDict):
+    foo: Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_base64_file_input(use_async: bool) -> None:
+    # strings are left as-is
+    assert await transform({"foo": "bar"}, TypedDictBase64Input, use_async) == {"foo": "bar"}
+
+    # pathlib.Path is automatically converted to base64
+    assert await transform({"foo": SAMPLE_FILE_PATH}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQo="
+    }  # type: ignore[comparison-overlap]
+
+    # io instances are automatically converted to base64
+    assert await transform({"foo": io.StringIO("Hello, world!")}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQ=="
+    }  # type: ignore[comparison-overlap]
+    assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
+        "foo": "SGVsbG8sIHdvcmxkIQ=="
+    }  # type: ignore[comparison-overlap]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_transform_skipping(use_async: bool) -> None:
+    # lists of ints are left as-is
+    data = [1, 2, 3]
+    assert await transform(data, List[int], use_async) is data
+
+    # iterables of ints are converted to a list
+    data = iter([1, 2, 3])
+    assert await transform(data, Iterable[int], use_async) == [1, 2, 3]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_notgiven(use_async: bool) -> None:
+    assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+    assert await transform({"foo_bar": NOT_GIVEN}, Foo1, use_async) == {}
diff --git a/tests/test_utils/test_logging.py b/tests/test_utils/test_logging.py
new file mode 100644
index 0000000000..cc018012e2
--- /dev/null
+++ b/tests/test_utils/test_logging.py
@@ -0,0 +1,100 @@
+import logging
+from typing import Any, Dict, cast
+
+import pytest
+
+from openai._utils import SensitiveHeadersFilter
+
+
+@pytest.fixture
+def logger_with_filter() -> logging.Logger:
+    logger = logging.getLogger("test_logger")
+    logger.setLevel(logging.DEBUG)
+    logger.addFilter(SensitiveHeadersFilter())
+    return logger
+
+
+def test_keys_redacted(logger_with_filter: logging.Logger, caplog: pytest.LogCaptureFixture) -> None:
+    with caplog.at_level(logging.DEBUG):
+        logger_with_filter.debug(
+            "Request options: %s",
+            {
+                "method": "post",
+                "url": "chat/completions",
+                "headers": {"api-key": "12345", "Authorization": "Bearer token"},
+            },
+        )
+
+    log_record = cast(Dict[str, Any], caplog.records[0].args)
+    assert log_record["method"] == "post"
+    assert log_record["url"] == "chat/completions"
+    assert log_record["headers"]["api-key"] == "<redacted>"
+    assert log_record["headers"]["Authorization"] == "<redacted>"
+    assert (
+        caplog.messages[0]
+        == "Request options: {'method': 'post', 'url': 'chat/completions', 'headers': {'api-key': '<redacted>', 'Authorization': '<redacted>'}}"
+    )
+
+
+def test_keys_redacted_case_insensitive(logger_with_filter: logging.Logger, caplog: pytest.LogCaptureFixture) -> None:
+    with caplog.at_level(logging.DEBUG):
+        logger_with_filter.debug(
+            "Request options: %s",
+            {
+                "method": "post",
+                "url": "chat/completions",
+                "headers": {"Api-key": "12345", "authorization": "Bearer token"},
+            },
+        )
+
+    log_record = cast(Dict[str, Any], caplog.records[0].args)
+    assert log_record["method"] == "post"
+    assert log_record["url"] == "chat/completions"
+    assert log_record["headers"]["Api-key"] == "<redacted>"
+    assert log_record["headers"]["authorization"] == "<redacted>"
+    assert (
+        caplog.messages[0]
+        == "Request options: {'method': 'post', 'url': 'chat/completions', 'headers': {'Api-key': '<redacted>', 'authorization': '<redacted>'}}"
+    )
+
+
+def test_no_headers(logger_with_filter: logging.Logger, caplog: pytest.LogCaptureFixture) -> None:
+    with caplog.at_level(logging.DEBUG):
+        logger_with_filter.debug(
+            "Request options: %s",
+            {"method": "post", "url": "chat/completions"},
+        )
+
+    log_record = cast(Dict[str, Any], caplog.records[0].args)
+    assert log_record["method"] == "post"
+    assert log_record["url"] == "chat/completions"
+    assert "api-key" not in log_record
+    assert "Authorization" not in log_record
+    assert caplog.messages[0] == "Request options: {'method': 'post', 'url': 'chat/completions'}"
+
+
+def test_headers_without_sensitive_info(logger_with_filter: logging.Logger, caplog: pytest.LogCaptureFixture) -> None:
+    with caplog.at_level(logging.DEBUG):
+        logger_with_filter.debug(
+            "Request options: %s",
+            {
+                "method": "post",
+                "url": "chat/completions",
+                "headers": {"custom": "value"},
+            },
+        )
+
+    log_record = cast(Dict[str, Any], caplog.records[0].args)
+    assert log_record["method"] == "post"
+    assert log_record["url"] == "chat/completions"
+    assert log_record["headers"] == {"custom": "value"}
+    assert (
+        caplog.messages[0]
+        == "Request options: {'method': 'post', 'url': 'chat/completions', 'headers': {'custom': 'value'}}"
+    )
+
+
+def test_standard_debug_msg(logger_with_filter: logging.Logger, caplog: pytest.LogCaptureFixture) -> None:
+    with caplog.at_level(logging.DEBUG):
+        logger_with_filter.debug("Sending HTTP Request: %s %s", "POST", "chat/completions")
+    assert caplog.messages[0] == "Sending HTTP Request: POST chat/completions"
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
new file mode 100644
index 0000000000..2b5ff19dab
--- /dev/null
+++ b/tests/test_utils/test_proxy.py
@@ -0,0 +1,35 @@
+import operator
+from typing import Any
+from typing_extensions import override
+
+from openai._utils import LazyProxy
+from openai._extras._common import MissingDependencyError
+
+
+class RecursiveLazyProxy(LazyProxy[Any]):
+    @override
+    def __load__(self) -> Any:
+        return self
+
+    def __call__(self, *_args: Any, **_kwds: Any) -> Any:
+        raise RuntimeError("This should never be called!")
+
+
+def test_recursive_proxy() -> None:
+    proxy = RecursiveLazyProxy()
+    assert repr(proxy) == "RecursiveLazyProxy"
+    assert str(proxy) == "RecursiveLazyProxy"
+    assert dir(proxy) == []
+    assert type(proxy).__name__ == "RecursiveLazyProxy"
+    assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+    class MissingDepsProxy(LazyProxy[Any]):
+        @override
+        def __load__(self) -> Any:
+            raise MissingDependencyError("Mocking missing dependency")
+
+    proxy = MissingDepsProxy()
+    assert not isinstance(proxy, dict)
+    assert isinstance(proxy, LazyProxy)
diff --git a/tests/test_utils/test_typing.py b/tests/test_utils/test_typing.py
new file mode 100644
index 0000000000..535935b9e1
--- /dev/null
+++ b/tests/test_utils/test_typing.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar, cast
+
+from openai._utils import extract_type_var_from_base
+
+_T = TypeVar("_T")
+_T2 = TypeVar("_T2")
+_T3 = TypeVar("_T3")
+
+
+class BaseGeneric(Generic[_T]): ...
+
+
+class SubclassGeneric(BaseGeneric[_T]): ...
+
+
+class BaseGenericMultipleTypeArgs(Generic[_T, _T2, _T3]): ...
+
+
+class SubclassGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T, _T2, _T3]): ...
+
+
+class SubclassDifferentOrderGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T2, _T, _T3]): ...
+
+
+def test_extract_type_var() -> None:
+    assert (
+        extract_type_var_from_base(
+            BaseGeneric[int],
+            index=0,
+            generic_bases=cast("tuple[type, ...]", (BaseGeneric,)),
+        )
+        == int
+    )
+
+
+def test_extract_type_var_generic_subclass() -> None:
+    assert (
+        extract_type_var_from_base(
+            SubclassGeneric[int],
+            index=0,
+            generic_bases=cast("tuple[type, ...]", (BaseGeneric,)),
+        )
+        == int
+    )
+
+
+def test_extract_type_var_multiple() -> None:
+    typ = BaseGenericMultipleTypeArgs[int, str, None]
+
+    generic_bases = cast("tuple[type, ...]", (BaseGenericMultipleTypeArgs,))
+    assert extract_type_var_from_base(typ, index=0, generic_bases=generic_bases) == int
+    assert extract_type_var_from_base(typ, index=1, generic_bases=generic_bases) == str
+    assert extract_type_var_from_base(typ, index=2, generic_bases=generic_bases) == type(None)
+
+
+def test_extract_type_var_generic_subclass_multiple() -> None:
+    typ = SubclassGenericMultipleTypeArgs[int, str, None]
+
+    generic_bases = cast("tuple[type, ...]", (BaseGenericMultipleTypeArgs,))
+    assert extract_type_var_from_base(typ, index=0, generic_bases=generic_bases) == int
+    assert extract_type_var_from_base(typ, index=1, generic_bases=generic_bases) == str
+    assert extract_type_var_from_base(typ, index=2, generic_bases=generic_bases) == type(None)
+
+
+def test_extract_type_var_generic_subclass_different_ordering_multiple() -> None:
+    typ = SubclassDifferentOrderGenericMultipleTypeArgs[int, str, None]
+
+    generic_bases = cast("tuple[type, ...]", (BaseGenericMultipleTypeArgs,))
+    assert extract_type_var_from_base(typ, index=0, generic_bases=generic_bases) == int
+    assert extract_type_var_from_base(typ, index=1, generic_bases=generic_bases) == str
+    assert extract_type_var_from_base(typ, index=2, generic_bases=generic_bases) == type(None)
diff --git a/tests/utils.py b/tests/utils.py
index b513794017..4cf5ce171b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,20 +1,36 @@
 from __future__ import annotations
 
+import io
 import os
+import inspect
 import traceback
 import contextlib
-from typing import Any, TypeVar, Iterator, cast
+from typing import Any, TypeVar, Iterator, ForwardRef, cast
 from datetime import date, datetime
 from typing_extensions import Literal, get_args, get_origin, assert_type
 
-from openai._types import NoneType
-from openai._utils import is_dict, is_list, is_list_type, is_union_type
+import rich
+
+from openai._types import Omit, NoneType
+from openai._utils import (
+    is_dict,
+    is_list,
+    is_list_type,
+    is_union_type,
+    extract_type_arg,
+    is_annotated_type,
+    is_type_alias_type,
+)
 from openai._compat import PYDANTIC_V2, field_outer_type, get_model_fields
 from openai._models import BaseModel
 
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 
 
+def evaluate_forwardref(forwardref: ForwardRef, globalns: dict[str, Any]) -> type:
+    return eval(str(forwardref), globalns)  # type: ignore[no-any-return]
+
+
 def assert_matches_model(model: type[BaseModelT], value: BaseModelT, *, path: list[str]) -> bool:
     for name, field in get_model_fields(model).items():
         field_value = getattr(value, name)
@@ -43,6 +59,13 @@ def assert_matches_type(
     path: list[str],
     allow_none: bool = False,
 ) -> None:
+    if is_type_alias_type(type_):
+        type_ = type_.__value__
+
+    # unwrap `Annotated[T, ...]` -> `T`
+    if is_annotated_type(type_):
+        type_ = extract_type_arg(type_, 0)
+
     if allow_none and value is None:
         return
 
@@ -63,6 +86,8 @@ def assert_matches_type(
         assert isinstance(value, bool)
     elif origin == float:
         assert isinstance(value, float)
+    elif origin == bytes:
+        assert isinstance(value, bytes)
     elif origin == datetime:
         assert isinstance(value, datetime)
     elif origin == date:
@@ -83,7 +108,22 @@ def assert_matches_type(
             assert_matches_type(key_type, key, path=[*path, "<dict key>"])
             assert_matches_type(items_type, item, path=[*path, "<dict item>"])
     elif is_union_type(type_):
-        for i, variant in enumerate(get_args(type_)):
+        variants = get_args(type_)
+
+        try:
+            none_index = variants.index(type(None))
+        except ValueError:
+            pass
+        else:
+            # special case Optional[T] for better error messages
+            if len(variants) == 2:
+                if value is None:
+                    # valid
+                    return
+
+                return assert_matches_type(type_=variants[not none_index], value=value, path=path)
+
+        for i, variant in enumerate(variants):
             try:
                 assert_matches_type(variant, value, path=[*path, f"variant {i}"])
                 return
@@ -91,10 +131,12 @@ def assert_matches_type(
                 traceback.print_exc()
                 continue
 
-        assert False, "Did not match any variants"
+        raise AssertionError("Did not match any variants")
     elif issubclass(origin, BaseModel):
         assert isinstance(value, type_)
         assert assert_matches_model(type_, cast(Any, value), path=path)
+    elif inspect.isclass(origin) and origin.__name__ == "HttpxBinaryResponseContent":
+        assert value.__class__.__name__ == "HttpxBinaryResponseContent"
     else:
         assert None, f"Unhandled field type: {type_}"
 
@@ -107,12 +149,26 @@ def _assert_list_type(type_: type[object], value: object) -> None:
         assert_type(inner_type, entry)  # type: ignore
 
 
+def rich_print_str(obj: object) -> str:
+    """Like `rich.print()` but returns the string instead"""
+    buf = io.StringIO()
+
+    console = rich.console.Console(file=buf, width=120)
+    console.print(obj)
+
+    return buf.getvalue()
+
+
 @contextlib.contextmanager
-def update_env(**new_env: str) -> Iterator[None]:
+def update_env(**new_env: str | Omit) -> Iterator[None]:
     old = os.environ.copy()
 
     try:
-        os.environ.update(new_env)
+        for name, value in new_env.items():
+            if isinstance(value, Omit):
+                os.environ.pop(name, None)
+            else:
+                os.environ[name] = value
 
         yield None
     finally: