From 9e9314d2371cebe0ef9ad3422455c6671350f715 Mon Sep 17 00:00:00 2001 From: Emily S Date: Fri, 11 Apr 2025 15:52:04 +0200 Subject: [PATCH 1/5] Add gen_ai.request.choice.count to openai instrumentation (#75) * Add gen_ai.request.choice.count to openai instrumentation * Add CHANGELOG entry * Fix formatting * CHANGELOG is updated when there's a release, remove CHANGELOG entry * Only set choice count if n != 1 * Update instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py Co-authored-by: Riccardo Magliocchetti --------- Co-authored-by: Riccardo Magliocchetti --- .../instrumentation/openai/helpers.py | 3 + .../tests/cassettes/test_chat_n_1.yaml | 138 ++++++++++++++++++ .../tests/test_beta_chat_completions.py | 2 + .../tests/test_chat_completions.py | 24 +++ 4 files changed, 167 insertions(+) create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_n_1.yaml diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py index 98d862f..82173a2 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py @@ -25,6 +25,7 @@ GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, @@ -141,6 +142,8 @@ def _is_set(value): if client := getattr(instance, "_client", None): span_attributes.update(_attributes_from_client(client)) + if _is_set(choice_count := kwargs.get("n")) and choice_count != 1: + span_attributes[GEN_AI_REQUEST_CHOICE_COUNT] = choice_count if _is_set(frequency_penalty := kwargs.get("frequency_penalty")): span_attributes[GEN_AI_REQUEST_FREQUENCY_PENALTY] = frequency_penalty if _is_set(max_tokens := kwargs.get("max_completion_tokens", kwargs.get("max_tokens"))): diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_n_1.yaml b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_n_1.yaml new file mode 100644 index 0000000..ded2949 --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_n_1.yaml @@ -0,0 +1,138 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains Bouvet Island?" + } + ], + "model": "gpt-4o-mini", + "n": 1 + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '139' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.0 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-BL8K8arBjCHMDOxqQd5YGBeYphZGG", + "object": "chat.completion", + "created": 1744376584, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Atlantic Ocean.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 22, + "completion_tokens": 5, + "total_tokens": 27, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_44added55e" + } + headers: + CF-RAY: + - 92eaae915b15e51d-TXL + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 11 Apr 2025 13:03:04 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '827' + openai-organization: test_openai_org_id + openai-processing-ms: + - '170' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '200' + x-ratelimit-limit-tokens: + - '100000' + x-ratelimit-remaining-requests: + - '197' + x-ratelimit-remaining-tokens: + - '99925' + x-ratelimit-reset-requests: + - 19m29.225s + x-ratelimit-reset-tokens: + - 32m9.545s + x-request-id: + - req_5ec52b920fea0d555ec3dbf813300fad + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py index 383327a..91b775a 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py @@ -33,6 +33,7 @@ GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, @@ -330,6 +331,7 @@ def test_chat_multiple_choices_with_capture_message_content( address, port = address_and_port(client) assert dict(span.attributes) == { GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_CHOICE_COUNT: 2, GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, GEN_AI_SYSTEM: "openai", GEN_AI_RESPONSE_ID: "chatcmpl-AfhuHpVEbcYGlsFuHOP60MtU4tIq9", diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index 5ce1a35..6a68e19 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -32,6 +32,7 @@ GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, @@ -130,6 +131,28 @@ def test_chat(default_openai_env, trace_exporter, metrics_reader, logs_exporter) ) +@pytest.mark.vcr() +def test_chat_n_1(default_openai_env, trace_exporter, metrics_reader, logs_exporter): + client = openai.OpenAI() + + messages = [ + { + "role": "user", + "content": TEST_CHAT_INPUT, + } + ] + + chat_completion = client.chat.completions.create(model=TEST_CHAT_MODEL, messages=messages, n=1) + + assert chat_completion.choices[0].message.content == "Atlantic Ocean." + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert GEN_AI_REQUEST_CHOICE_COUNT not in span.attributes + + @pytest.mark.skipif(OPENAI_VERSION < (1, 8, 0), reason="LegacyAPIResponse available") @pytest.mark.vcr() def test_chat_with_raw_response(default_openai_env, trace_exporter, metrics_reader, logs_exporter): @@ -471,6 +494,7 @@ def test_chat_multiple_choices_with_capture_message_content( address, port = address_and_port(client) assert dict(span.attributes) == { GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_CHOICE_COUNT: 2, GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, GEN_AI_SYSTEM: "openai", GEN_AI_RESPONSE_ID: "chatcmpl-AfhuHpVEbcYGlsFuHOP60MtU4tIq9", From 60302847dd2d5a71a8601b826dc023fd6a9132a8 Mon Sep 17 00:00:00 2001 From: Srdjan Lulic Date: Mon, 14 Apr 2025 17:39:13 +0100 Subject: [PATCH 2/5] openai: Use "gen_ai.request.seed" instead of deprecated "gen_ai.openai.request.seed" attribute. (#76) --- .../src/opentelemetry/instrumentation/openai/helpers.py | 4 ++-- .../tests/test_beta_chat_completions.py | 4 ++-- .../tests/test_chat_completions.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py index 82173a2..1d75555 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py @@ -21,7 +21,6 @@ from opentelemetry._events import Event, EventLogger from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, - GEN_AI_OPENAI_REQUEST_SEED, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, @@ -30,6 +29,7 @@ GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, GEN_AI_REQUEST_PRESENCE_PENALTY, + GEN_AI_REQUEST_SEED, GEN_AI_REQUEST_STOP_SEQUENCES, GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_P, @@ -159,7 +159,7 @@ def _is_set(value): stop_sequences = [stop_sequences] span_attributes[GEN_AI_REQUEST_STOP_SEQUENCES] = stop_sequences if _is_set(seed := kwargs.get("seed")): - span_attributes[GEN_AI_OPENAI_REQUEST_SEED] = seed + span_attributes[GEN_AI_REQUEST_SEED] = seed if _is_set(service_tier := kwargs.get("service_tier")): span_attributes[GEN_AI_OPENAI_REQUEST_SERVICE_TIER] = service_tier if _is_set(response_format := kwargs.get("response_format")): diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py index 91b775a..a746cad 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py @@ -29,7 +29,6 @@ from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, - GEN_AI_OPENAI_REQUEST_SEED, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, @@ -38,6 +37,7 @@ GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, GEN_AI_REQUEST_PRESENCE_PENALTY, + GEN_AI_REQUEST_SEED, GEN_AI_REQUEST_STOP_SEQUENCES, GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_P, @@ -246,7 +246,7 @@ def test_chat_all_the_client_options(default_openai_env, trace_exporter, metrics address, port = address_and_port(client) expected_attrs = { - GEN_AI_OPENAI_REQUEST_SEED: 100, + GEN_AI_REQUEST_SEED: 100, GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index 6a68e19..b3dc314 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -28,7 +28,6 @@ from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, - GEN_AI_OPENAI_REQUEST_SEED, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, @@ -37,6 +36,7 @@ GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, GEN_AI_REQUEST_PRESENCE_PENALTY, + GEN_AI_REQUEST_SEED, GEN_AI_REQUEST_STOP_SEQUENCES, GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_P, @@ -330,7 +330,7 @@ def test_chat_all_the_client_options(default_openai_env, trace_exporter, metrics address, port = address_and_port(client) expected_attrs = { - GEN_AI_OPENAI_REQUEST_SEED: 100, + GEN_AI_REQUEST_SEED: 100, GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", @@ -1203,7 +1203,7 @@ def test_chat_stream_all_the_client_options(default_openai_env, trace_exporter, address, port = address_and_port(client) expected_attrs = { - GEN_AI_OPENAI_REQUEST_SEED: 100, + GEN_AI_REQUEST_SEED: 100, GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", From 497100743b9f3596ba447749c693eec0c39ed3b8 Mon Sep 17 00:00:00 2001 From: Srdjan Lulic Date: Wed, 16 Apr 2025 12:53:00 +0100 Subject: [PATCH 3/5] openai: Use semconv-1.31.0 (#77) * Use "gen_ai.output.type" instead of deprecated "gen_ai.openai.request.response_format" attribute. * Use json instead of json_schema and json_object for gen_ai.output.type attribute value * Bump schema version number --- .../opentelemetry/instrumentation/openai/__init__.py | 4 ++-- .../opentelemetry/instrumentation/openai/helpers.py | 11 +++++++---- .../tests/test_beta_chat_completions.py | 12 ++++++------ .../tests/test_chat_completions.py | 10 +++++----- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/__init__.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/__init__.py index c1d312d..973e6eb 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/__init__.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/__init__.py @@ -87,14 +87,14 @@ def _instrument(self, **kwargs): __name__, __version__, tracer_provider, - schema_url=Schemas.V1_28_0.value, + schema_url=Schemas.V1_31_0.value, ) meter_provider = kwargs.get("meter_provider") self.meter = get_meter( __name__, __version__, meter_provider, - schema_url=Schemas.V1_28_0.value, + schema_url=Schemas.V1_31_0.value, ) event_logger_provider = kwargs.get("event_logger_provider") self.event_logger = get_event_logger(__name__, event_logger_provider) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py index 1d75555..f497481 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py @@ -20,10 +20,10 @@ from opentelemetry._events import Event, EventLogger from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_TYPE, GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, @@ -166,13 +166,16 @@ def _is_set(value): # response_format may be string or object with a string in the `type` key if isinstance(response_format, Mapping): if _is_set(response_format_type := response_format.get("type")): - span_attributes[GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT] = response_format_type + if response_format_type in ("json_object", "json_schema"): + span_attributes[GEN_AI_OUTPUT_TYPE] = "json" + else: + span_attributes[GEN_AI_OUTPUT_TYPE] = response_format_type elif isinstance(response_format, str): - span_attributes[GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT] = response_format + span_attributes[GEN_AI_OUTPUT_TYPE] = response_format else: # Assume structured output lazily parsed to a schema via type_to_response_format_param or similar. # e.g. pydantic._internal._model_construction.ModelMetaclass - span_attributes[GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT] = "json_schema" + span_attributes[GEN_AI_OUTPUT_TYPE] = "json" return span_attributes diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py index a746cad..6bdbc62 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_beta_chat_completions.py @@ -28,10 +28,10 @@ from opentelemetry._logs import LogRecord from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_TYPE, GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, @@ -248,7 +248,7 @@ def test_chat_all_the_client_options(default_openai_env, trace_exporter, metrics expected_attrs = { GEN_AI_REQUEST_SEED: 100, GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", + GEN_AI_OUTPUT_TYPE: "text", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", GEN_AI_OPERATION_NAME: "chat", GEN_AI_REQUEST_FREQUENCY_PENALTY: 0, @@ -1502,14 +1502,14 @@ def test_chat_exported_schema_version(default_openai_env, trace_exporter, metric spans = trace_exporter.get_finished_spans() (span,) = spans - assert span.instrumentation_scope.schema_url == "https://opentelemetry.io/schemas/1.28.0" + assert span.instrumentation_scope.schema_url == "https://opentelemetry.io/schemas/1.31.0" metrics_data = metrics_reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics for metrics in resource_metrics: for scope_metrics in metrics.scope_metrics: - assert scope_metrics.schema_url == "https://opentelemetry.io/schemas/1.28.0" + assert scope_metrics.schema_url == "https://opentelemetry.io/schemas/1.31.0" @pytest.mark.skipif(OPENAI_VERSION < (1, 40, 0), reason="beta completions added in 1.40.0") @@ -1545,7 +1545,7 @@ def test_parse_response_format_json_object_with_capture_message_content( address, port = address_and_port(client) assert dict(span.attributes) == { GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "json_object", + GEN_AI_OUTPUT_TYPE: "json", GEN_AI_OPERATION_NAME: "chat", GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, GEN_AI_SYSTEM: "openai", @@ -1620,7 +1620,7 @@ def test_parse_response_format_structured_output_with_capture_message_content( address, port = address_and_port(client) assert dict(span.attributes) == { GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "json_schema", + GEN_AI_OUTPUT_TYPE: "json", GEN_AI_OPERATION_NAME: "chat", GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, GEN_AI_SYSTEM: "openai", diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index b3dc314..bc4fd54 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -27,10 +27,10 @@ from opentelemetry._logs import LogRecord from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT, GEN_AI_OPENAI_REQUEST_SERVICE_TIER, GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_TYPE, GEN_AI_REQUEST_CHOICE_COUNT, GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, @@ -332,7 +332,7 @@ def test_chat_all_the_client_options(default_openai_env, trace_exporter, metrics expected_attrs = { GEN_AI_REQUEST_SEED: 100, GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", + GEN_AI_OUTPUT_TYPE: "text", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", GEN_AI_OPERATION_NAME: "chat", GEN_AI_REQUEST_FREQUENCY_PENALTY: 0, @@ -1204,7 +1204,7 @@ def test_chat_stream_all_the_client_options(default_openai_env, trace_exporter, address, port = address_and_port(client) expected_attrs = { GEN_AI_REQUEST_SEED: 100, - GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT: "text", + GEN_AI_OUTPUT_TYPE: "text", GEN_AI_OPENAI_REQUEST_SERVICE_TIER: "default", GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", GEN_AI_OPERATION_NAME: "chat", @@ -2444,14 +2444,14 @@ def test_chat_exported_schema_version(default_openai_env, trace_exporter, metric spans = trace_exporter.get_finished_spans() (span,) = spans - assert span.instrumentation_scope.schema_url == "https://opentelemetry.io/schemas/1.28.0" + assert span.instrumentation_scope.schema_url == "https://opentelemetry.io/schemas/1.31.0" metrics_data = metrics_reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics for metrics in resource_metrics: for scope_metrics in metrics.scope_metrics: - assert scope_metrics.schema_url == "https://opentelemetry.io/schemas/1.28.0" + assert scope_metrics.schema_url == "https://opentelemetry.io/schemas/1.31.0" @dataclass From bee16d462b225e4875bfdbc06adf9625c167fd45 Mon Sep 17 00:00:00 2001 From: Srdjan Lulic Date: Wed, 23 Apr 2025 10:30:37 +0100 Subject: [PATCH 4/5] openai: Fix missing or double spans when completion stream is used with context manager (#80) * openai: Add conditional span closure logic depending on whether streamed completion uses context manager or not * openai: Add conditional span closure logic for async streaming completion * openai: Guard span ending based on the flag rather than the context manager --- .../instrumentation/openai/wrappers.py | 18 +++ ...test_chat_stream_with_context_manager.yaml | 114 +++++++++++++++++ .../tests/test_chat_completions.py | 117 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py index 8256822..62ec1c6 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py @@ -67,8 +67,13 @@ def __init__( self.choices = [] self.usage = None self.service_tier = None + self.ended = False def end(self, exc=None): + if self.ended: + return + + self.ended = True if exc is not None: self.span.set_status(StatusCode.ERROR, str(exc)) self.span.set_attribute(ERROR_TYPE, exc.__class__.__qualname__) @@ -111,6 +116,12 @@ def process_chunk(self, chunk): if hasattr(chunk, "service_tier"): self.service_tier = chunk.service_tier + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.end(exc_value) + def __iter__(self): stream = self.__wrapped__ try: @@ -124,6 +135,13 @@ def __iter__(self): raise self.end() + async def __aenter__(self): + # No difference in behavior between sync and async context manager + return self.__enter__() + + async def __aexit__(self, exc_type, exc_value, traceback): + self.__exit__(exc_type, exc_value, traceback) + async def __aiter__(self): stream = self.__wrapped__ try: diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml new file mode 100644 index 0000000..0b1827d --- /dev/null +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml @@ -0,0 +1,114 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Answer in up to 3 words: Which ocean contains Bouvet Island?" + } + ], + "model": "gpt-4o-mini", + "stream": true + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '147' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |+ + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":"South"},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":" Atlantic"},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":" Ocean"},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + data: [DONE] + + headers: + CF-RAY: + - 933c86cb9ae5773e-LHR + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 21 Apr 2025 11:26:28 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: test_openai_org_id + openai-processing-ms: + - '460' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '200' + x-ratelimit-limit-tokens: + - '100000' + x-ratelimit-remaining-requests: + - '199' + x-ratelimit-remaining-tokens: + - '88447' + x-ratelimit-reset-requests: + - 7m12s + x-ratelimit-reset-tokens: + - 83h10m39.057s + x-request-id: + - req_a39b652ec0ccb45a968e10112e569bf4 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py index bc4fd54..5fbf820 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py @@ -1103,6 +1103,62 @@ def test_chat_stream(default_openai_env, trace_exporter, metrics_reader, logs_ex ) +@pytest.mark.vcr() +def test_chat_stream_with_context_manager(default_openai_env, trace_exporter, metrics_reader, logs_exporter): + client = openai.OpenAI() + + messages = [ + { + "role": "user", + "content": TEST_CHAT_INPUT, + } + ] + + # Use a context manager for the streaming response + with client.chat.completions.create(model=TEST_CHAT_MODEL, messages=messages, stream=True) as chat_completion: + chunks = [chunk.choices[0].delta.content or "" for chunk in chat_completion if chunk.choices] + assert "".join(chunks) == "South Atlantic Ocean." + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {TEST_CHAT_MODEL}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + address, port = address_and_port(client) + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: "chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL", + GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL, + GEN_AI_RESPONSE_FINISH_REASONS: ("stop",), + SERVER_ADDRESS: address, + SERVER_PORT: port, + GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", + } + + logs = logs_exporter.get_finished_logs() + assert len(logs) == 2 + log_records = logrecords_from_logs(logs) + user_message, choice = log_records + assert dict(user_message.attributes) == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"} + assert dict(user_message.body) == {} + + assert_stop_log_record(choice) + + (operation_duration_metric,) = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, + GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL, + } + assert_operation_duration_metric( + client, "chat", operation_duration_metric, attributes=attributes, min_data_point=0.006761051714420319 + ) + + @pytest.mark.skipif(OPENAI_VERSION < (1, 8, 0), reason="LegacyAPIResponse available") @pytest.mark.vcr() def test_chat_stream_with_raw_response(default_openai_env, trace_exporter, metrics_reader, logs_exporter): @@ -2096,6 +2152,67 @@ async def test_chat_async_stream(default_openai_env, trace_exporter, metrics_rea ) +@pytest.mark.vcr() +@pytest.mark.asyncio +async def test_chat_async_stream_with_context_manager( + default_openai_env, trace_exporter, metrics_reader, logs_exporter +): + client = openai.AsyncOpenAI() + + messages = [ + { + "role": "user", + "content": TEST_CHAT_INPUT, + } + ] + + # Use a context manager for the asynchronous streaming response + async with await client.chat.completions.create( + model=TEST_CHAT_MODEL, messages=messages, stream=True + ) as chat_completion: + chunks = [chunk.choices[0].delta.content or "" async for chunk in chat_completion if chunk.choices] + assert "".join(chunks) == "South Atlantic Ocean." + + spans = trace_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.name == f"chat {TEST_CHAT_MODEL}" + assert span.kind == SpanKind.CLIENT + assert span.status.status_code == StatusCode.UNSET + + address, port = address_and_port(client) + assert dict(span.attributes) == { + GEN_AI_OPERATION_NAME: "chat", + GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, + GEN_AI_SYSTEM: "openai", + GEN_AI_RESPONSE_ID: "chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL", + GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL, + GEN_AI_RESPONSE_FINISH_REASONS: ("stop",), + SERVER_ADDRESS: address, + SERVER_PORT: port, + GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default", + } + + logs = logs_exporter.get_finished_logs() + assert len(logs) == 2 + log_records = logrecords_from_logs(logs) + user_message, choice = log_records + assert dict(user_message.attributes) == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"} + assert dict(user_message.body) == {} + + assert_stop_log_record(choice) + + (operation_duration_metric,) = get_sorted_metrics(metrics_reader) + attributes = { + GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL, + GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL, + } + assert_operation_duration_metric( + client, "chat", operation_duration_metric, attributes=attributes, min_data_point=0.006761051714420319 + ) + + @pytest.mark.skipif(OPENAI_VERSION < (1, 8, 0), reason="LegacyAPIResponse available") @pytest.mark.vcr() @pytest.mark.asyncio From 84a3f24d56c3d70469db7a011a9381ad9fb3d90c Mon Sep 17 00:00:00 2001 From: Srdjan Lulic Date: Wed, 23 Apr 2025 12:06:32 +0100 Subject: [PATCH 5/5] openai: update CHANGELOG and bump version to 1.1.0 (#81) --- .../CHANGELOG.md | 5 +++++ .../src/opentelemetry/instrumentation/openai/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/CHANGELOG.md b/instrumentation/elastic-opentelemetry-instrumentation-openai/CHANGELOG.md index 44d855a..aad4401 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/CHANGELOG.md +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/CHANGELOG.md @@ -1,5 +1,10 @@ # Elastic OpenTelemetry Instrumentation OpenAI +## v1.1.0 + +- Fix missing or double spans when completion stream is used with context manager (#80) +- Follow semantic conventions 1.31.0: use GEN_AI_OUTPUT_TYPE, GEN_AI_REQUEST_SEED and GEN_AI_REQUEST_CHOICE_COUNT attributes (#77, #76, #75) + ## v1.0.0 - Fix instrumentation of with_raw_response (#73) diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/version.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/version.py index 252d89a..669e42a 100644 --- a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/version.py +++ b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/version.py @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.0.0" +__version__ = "1.1.0"