huggingface chat completion

antonpirker · antonpirker · commit 37f77b386577 · 2025-09-03T08:32:14.000+02:00
diff --git a/test-huggingface-hub-chat-completion/main.py b/test-huggingface-hub-chat-completion/main.py
@@ -0,0 +1,93 @@
+import os
+
+import sentry_sdk
+from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
+
+from huggingface_hub import InferenceClient
+
+
+TOOLS = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get current temperature for a given location.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City and country e.g. Paris, France"
+                        }
+                    },
+                    "required": ["location"],
+                },
+            }
+        }
+]
+
+
+def main():
+    sentry_sdk.init(
+        dsn=os.getenv("SENTRY_DSN", None),
+        environment=os.getenv("ENV", "local"),
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+        debug=True,
+        integrations=[
+            HuggingfaceHubIntegration(include_prompts=True),
+        ],
+    )
+
+    model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+    provider = "featherless-ai"
+
+    client = InferenceClient(
+        token=os.getenv("HF_TOKEN", None),
+        provider=provider,
+    )
+
+    response = client.chat_completion(
+        model=model,
+        messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like the next 3 days in London, UK?"
+        }
+        ],
+        tools=TOOLS,
+        tool_choice="auto",
+        frequency_penalty=0.1,
+        max_tokens=50,
+        presence_penalty=0.2,
+        top_p=0.9,
+        temperature=0.2,
+        stream=False,
+    )
+    print("--------------------------------")
+    print("Output client.chat_completion:")
+    print(response)
+
+    # OpenAI compatible API
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Count to 10"},
+        ],
+        tools=TOOLS,
+        tool_choice="auto",
+        frequency_penalty=0.1,
+        max_tokens=50,
+        presence_penalty=0.2,
+        top_p=0.9,
+        temperature=0.2,
+        stream=False,
+    )
+    print("--------------------------------")
+    print("Output client.chat.completions.create:")
+    print(response)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test-huggingface-hub-chat-completion/pyproject.toml b/test-huggingface-hub-chat-completion/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "test"
+version = "0"
+requires-python = ">=3.12"
+
+dependencies = [
+    "huggingface-hub[inference]",
+    "ipdb>=0.13.13",
+    "sentry-sdk",
+]
+
+[tool.uv.sources]
+sentry-sdk = { path = "../../sentry-python", editable = true }
diff --git a/test-huggingface-hub-chat-completion/run.sh b/test-huggingface-hub-chat-completion/run.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+# exit on first error
+set -euo pipefail
+
+# Install uv if it's not installed
+if ! command -v uv &> /dev/null; then
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+fi
+
+# login to huggingface
+# uv run hf auth login
+
+# Run the script
+uv run python main.py
diff --git a/test-huggingface-hub-chat-completion/uv.lock b/test-huggingface-hub-chat-completion/uv.lock