getsentry
diff --git a/‎test-huggingface-hub-text-generation/local_server.py‎
Lines changed: 122 additions & 0 deletions b/‎test-huggingface-hub-text-generation/local_server.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎test-huggingface-hub-text-generation/main.py‎
Lines changed: 58 additions & 0 deletions b/‎test-huggingface-hub-text-generation/main.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎test-huggingface-hub-text-generation/pyproject.toml‎
Lines changed: 18 additions & 0 deletions b/‎test-huggingface-hub-text-generation/pyproject.toml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎test-huggingface-hub-text-generation/run.sh‎
Lines changed: 12 additions & 0 deletions b/‎test-huggingface-hub-text-generation/run.sh‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎test-huggingface-hub-text-generation/run_server.sh‎
Lines changed: 14 additions & 0 deletions b/‎test-huggingface-hub-text-generation/run_server.sh‎
Lines changed: 14 additions & 0 deletions
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+"""
+Local text generation server that mimics the Hugging Face Inference API.
+This allows you to use InferenceClient with a local model.
+"""
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+import torch
+import uvicorn
+from typing import Dict, Any, Optional
+
+
+class TextGenerationRequest(BaseModel):
+    inputs: str
+    parameters: Optional[Dict[str, Any]] = {}
+
+
+class TextGenerationResponse(BaseModel):
+    generated_text: str
+
+
+class LocalTextGenerationServer:
+    def __init__(self, model_name: str = "gpt2"):
+        print(f"🔄 Loading model: {model_name}")
+        self.model = GPT2LMHeadModel.from_pretrained(model_name)
+        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        print("✅ Model loaded successfully!")
+
+    def generate_text(self, prompt: str, parameters: Dict[str, Any] = None) -> str:
+        if parameters is None:
+            parameters = {}
+
+        # Default parameters
+        max_new_tokens = parameters.get("max_new_tokens", 50)
+        temperature = parameters.get("temperature", 0.7)
+        do_sample = parameters.get("do_sample", True)
+
+        # Tokenize
+        inputs = self.tokenizer.encode(prompt, return_tensors="pt")
+
+        # Generate
+        with torch.no_grad():
+            outputs = self.model.generate(
+                inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                do_sample=do_sample,
+                pad_token_id=self.tokenizer.eos_token_id,
+                no_repeat_ngram_size=2
+            )
+
+        # Decode
+        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+        # Return full text (like HF API does)
+        return generated_text
+
+
+# Initialize the model server
+print("🚀 Starting Local Text Generation Server...")
+text_gen_server = LocalTextGenerationServer()
+
+# Create FastAPI app
+app = FastAPI(title="Local Text Generation API", version="1.0.0")
+
+
+@app.get("/")
+async def root():
+    return {"message": "Local Text Generation Server", "status": "running"}
+
+
+@app.post("/")
+async def generate_text_endpoint(request: TextGenerationRequest):
+    """
+    Main text generation endpoint that mimics HuggingFace Inference API format.
+    """
+    try:
+        generated_text = text_gen_server.generate_text(
+            request.inputs,
+            request.parameters
+        )
+
+        # Return in HF API format (list with generated_text)
+        return [{"generated_text": generated_text}]
+
+    except Exception as e:
+        return {"error": str(e)}
+
+
+@app.post("/generate")
+async def generate_text_simple(request: TextGenerationRequest):
+    """
+    Alternative endpoint with simpler response format.
+    """
+    try:
+        generated_text = text_gen_server.generate_text(
+            request.inputs,
+            request.parameters
+        )
+
+        return {"generated_text": generated_text}
+
+    except Exception as e:
+        return {"error": str(e)}
+
+
+if __name__ == "__main__":
+    print("🌐 Server will be available at: http://localhost:8000")
+    print("📝 Test endpoint: POST http://localhost:8000/")
+    print("📚 API docs: http://localhost:8000/docs")
+    print("🛑 Press Ctrl+C to stop the server")
+
+    uvicorn.run(
+        "local_server:app",
+        host="127.0.0.1",
+        port=8000,
+        reload=False,
+        log_level="info"
+    )
@@ -0,0 +1,58 @@
+import os
+import time
+
+import sentry_sdk
+from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
+
+from huggingface_hub import InferenceClient
+
+
+def main():
+    sentry_sdk.init(
+        dsn=os.getenv("SENTRY_DSN", None),
+        environment=os.getenv("ENV", "local"),
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+        debug=True,
+        integrations=[
+            HuggingfaceHubIntegration(include_prompts=True),
+        ],
+    )
+
+    # Connect to local text generation server
+    local_server_url = "http://localhost:8000"
+
+    print(f"🔄 Connecting to local server: {local_server_url}")
+    print("📝 Make sure to start the local server first:")
+    print("   ./run_server.sh")
+    print()
+
+    with sentry_sdk.start_transaction(name="huggingface-hub-text-generation"):
+        client = InferenceClient(model=local_server_url)
+
+        prompt = "The sky is"
+        try:
+            print(f"🔄 Test: Generating text for '{prompt}'")
+
+            # Use the real InferenceClient with local server
+            response = client.text_generation(
+                prompt,
+                max_new_tokens=40,
+                temperature=0.7,
+                do_sample=True,
+            )
+
+            print("✅ Success!")
+            print(f"   Prompt: {prompt}")
+            print(f"   Generated: {response}")
+            print()
+
+        except Exception as e:
+            print(f"❌ Failed: {type(e).__name__}: {e}")
+            print("💡 Make sure the local server is running:")
+            print("   ./run_server.sh")
+            print()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,18 @@
+[project]
+name = "test"
+version = "0"
+requires-python = ">=3.12"
+
+dependencies = [
+    "fastapi>=0.116.1",
+    "huggingface-hub[inference]==0.22.0",
+    "ipdb>=0.13.13",
+    "sentry-sdk",
+    "text-generation>=0.7.0",
+    "torch>=2.8.0",
+    "transformers>=4.40.2",
+    "uvicorn>=0.35.0",
+]
+
+[tool.uv.sources]
+sentry-sdk = { path = "../../sentry-python", editable = true }
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+# exit on first error
+set -euo pipefail
+
+# Install uv if it's not installed
+if ! command -v uv &> /dev/null; then
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+fi
+
+# Run the script
+uv run python main.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+# exit on first error
+set -euo pipefail
+
+# Install uv if it's not installed
+if ! command -v uv &> /dev/null; then
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+fi
+
+echo "Starting local text generation server accessible via huggingface_hub..."
+
+# Run text generation server, accessible via huggingface_hub
+uv run python local_server.py