poc auto-disable openai instrumentation in LiteLLMCallback (#177)

dinmukhamedm · web-flow · commit 3f58339b451b · 2025-08-20T11:24:49.000+01:00
* poc auto-disable openai instrumentation in LiteLLMCallback

* remove kwarg, fix tests

* fix typehint in conftest
diff --git a/src/lmnr/opentelemetry_lib/litellm/__init__.py b/src/lmnr/opentelemetry_lib/litellm/__init__.py
@@ -11,6 +11,10 @@
 from lmnr.opentelemetry_lib.utils.package_check import is_package_installed
 from lmnr.sdk.log import get_default_logger
 
+from lmnr.opentelemetry_lib.opentelemetry.instrumentation.openai import (
+    OpenAIInstrumentor,
+)
+
 logger = get_default_logger(__name__)
 
 SUPPORTED_CALL_TYPES = ["completion", "acompletion"]
@@ -41,6 +45,17 @@ def __init__(self, **kwargs):
             if not hasattr(TracerWrapper, "instance") or TracerWrapper.instance is None:
                 raise ValueError("Laminar must be initialized before LiteLLM callback")
 
+            if is_package_installed("openai"):
+                openai_instrumentor = OpenAIInstrumentor()
+                if (
+                    openai_instrumentor
+                    and openai_instrumentor.is_instrumented_by_opentelemetry
+                ):
+                    logger.info(
+                        "Disabling OpenTelemetry instrumentation for OpenAI to avoid double-instrumentation of LiteLLM."
+                    )
+                    openai_instrumentor.uninstrument()
+
         def _get_tracer(self) -> Tracer:
             if not hasattr(TracerWrapper, "instance") or TracerWrapper.instance is None:
                 raise ValueError("Laminar must be initialized before LiteLLM callback")
diff --git a/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py b/src/lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py
@@ -2,6 +2,8 @@
 
 from opentelemetry._events import get_event_logger
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+
+from lmnr.sdk.log import get_default_logger
 from ..shared.chat_wrappers import (
     achat_wrapper,
     chat_wrapper,
@@ -44,6 +46,7 @@
 
 
 _instruments = ("openai >= 1.0.0",)
+logger = get_default_logger(__name__)
 
 
 class OpenAIV1Instrumentor(BaseInstrumentor):
@@ -63,7 +66,8 @@ def _try_wrap(self, module, function, wrapper):
         """
         try:
             wrap_function_wrapper(module, function, wrapper)
-        except (AttributeError, ModuleNotFoundError):
+        except (AttributeError, ModuleNotFoundError, ImportError):
+            logger.debug(f"Failed to wrap {module}.{function}")
             pass
 
     def _instrument(self, **kwargs):
@@ -331,28 +335,34 @@ def _instrument(self, **kwargs):
         )
 
     def _uninstrument(self, **kwargs):
-        unwrap("openai.resources.chat.completions", "Completions.create")
-        unwrap("openai.resources.completions", "Completions.create")
-        unwrap("openai.resources.embeddings", "Embeddings.create")
-        unwrap("openai.resources.chat.completions", "AsyncCompletions.create")
-        unwrap("openai.resources.completions", "AsyncCompletions.create")
-        unwrap("openai.resources.embeddings", "AsyncEmbeddings.create")
-        unwrap("openai.resources.images", "Images.generate")
+        self.try_unwrap("openai.resources.chat.completions.Completions", "create")
+        self.try_unwrap("openai.resources.completions.Completions", "create")
+        self.try_unwrap("openai.resources.embeddings.Embeddings", "create")
+        self.try_unwrap("openai.resources.chat.completions.AsyncCompletions", "create")
+        self.try_unwrap("openai.resources.completions.AsyncCompletions", "create")
+        self.try_unwrap("openai.resources.embeddings.AsyncEmbeddings", "create")
+        self.try_unwrap("openai.resources.images.Images", "generate")
+        self.try_unwrap("openai.resources.chat.completions.Completions", "parse")
+        self.try_unwrap("openai.resources.chat.completions.AsyncCompletions", "parse")
+        self.try_unwrap("openai.resources.beta.assistants.Assistants", "create")
+        self.try_unwrap("openai.resources.beta.chat.completions.Completions", "parse")
+        self.try_unwrap(
+            "openai.resources.beta.chat.completions.AsyncCompletions", "parse"
+        )
+        self.try_unwrap("openai.resources.beta.threads.runs.Runs", "create")
+        self.try_unwrap("openai.resources.beta.threads.runs.Runs", "retrieve")
+        self.try_unwrap("openai.resources.beta.threads.runs.Runs", "create_and_stream")
+        self.try_unwrap("openai.resources.beta.threads.messages.Messages", "list")
+        self.try_unwrap("openai.resources.responses.Responses", "create")
+        self.try_unwrap("openai.resources.responses.Responses", "retrieve")
+        self.try_unwrap("openai.resources.responses.Responses", "cancel")
+        self.try_unwrap("openai.resources.responses.AsyncResponses", "create")
+        self.try_unwrap("openai.resources.responses.AsyncResponses", "retrieve")
+        self.try_unwrap("openai.resources.responses.AsyncResponses", "cancel")
 
-        # Beta APIs may not be available consistently in all versions
+    def try_unwrap(self, module, function):
         try:
-            unwrap("openai.resources.beta.assistants", "Assistants.create")
-            unwrap("openai.resources.beta.chat.completions", "Completions.parse")
-            unwrap("openai.resources.beta.chat.completions", "AsyncCompletions.parse")
-            unwrap("openai.resources.beta.threads.runs", "Runs.create")
-            unwrap("openai.resources.beta.threads.runs", "Runs.retrieve")
-            unwrap("openai.resources.beta.threads.runs", "Runs.create_and_stream")
-            unwrap("openai.resources.beta.threads.messages", "Messages.list")
-            unwrap("openai.resources.responses", "Responses.create")
-            unwrap("openai.resources.responses", "Responses.retrieve")
-            unwrap("openai.resources.responses", "Responses.cancel")
-            unwrap("openai.resources.responses", "AsyncResponses.create")
-            unwrap("openai.resources.responses", "AsyncResponses.retrieve")
-            unwrap("openai.resources.responses", "AsyncResponses.cancel")
-        except ImportError:
+            unwrap(module, function)
+        except (AttributeError, ModuleNotFoundError, ImportError):
+            logger.debug(f"Failed to unwrap {module}.{function}")
             pass
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,3 +1,4 @@
+from typing import Generator
 import pytest
 from unittest.mock import patch
 from lmnr import Laminar
@@ -37,9 +38,30 @@ def mock_tracermanager_init(*args, **kwargs):
     return exporter
 
 
-@pytest.fixture(scope="session")
-def litellm_callback() -> LaminarLiteLLMCallback:
-    return LaminarLiteLLMCallback()
+@pytest.fixture(scope="function")
+def litellm_callback() -> Generator[LaminarLiteLLMCallback, None, None]:
+    from lmnr.opentelemetry_lib.opentelemetry.instrumentation.openai import (
+        OpenAIInstrumentor,
+    )
+
+    # Check if OpenAI was instrumented before we create the LiteLLM callback
+    instrumentor = OpenAIInstrumentor()
+    was_instrumented = instrumentor.is_instrumented_by_opentelemetry
+
+    # Create the callback (this will uninstrument OpenAI if it was instrumented)
+    callback = LaminarLiteLLMCallback()
+
+    yield callback
+
+    # Re-instrument OpenAI if it was originally instrumented
+    if was_instrumented and not instrumentor.is_instrumented_by_opentelemetry:
+        # Re-instrument with the same settings as the global initialization
+        from lmnr.opentelemetry_lib.tracing import TracerWrapper
+
+        if hasattr(TracerWrapper, "instance") and TracerWrapper.instance is not None:
+            instrumentor.instrument(
+                tracer_provider=TracerWrapper.instance._tracer_provider
+            )
 
 
 @pytest.fixture(scope="function", autouse=True)
diff --git a/tests/test_instrumentations/test_openai/conftest.py b/tests/test_instrumentations/test_openai/conftest.py
@@ -115,7 +115,7 @@ def fixture_meter_provider(reader):
     return meter_provider
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture(scope="function")
 def instrument_legacy(reader, tracer_provider, meter_provider):
     async def upload_base64_image(*args):
         return "/some/url"
@@ -125,14 +125,18 @@ async def upload_base64_image(*args):
         enrich_token_usage=True,
         upload_base64_image=upload_base64_image,
     )
-    instrumentor.instrument(
-        tracer_provider=tracer_provider,
-        meter_provider=meter_provider,
-    )
+    was_already_instrumented = instrumentor.is_instrumented_by_opentelemetry
+    if not was_already_instrumented:
+        instrumentor.instrument(
+            tracer_provider=tracer_provider,
+            meter_provider=meter_provider,
+        )
 
     yield instrumentor
 
-    instrumentor.uninstrument()
+    # Only uninstrument if we instrumented it ourselves
+    if not was_already_instrumented and instrumentor.is_instrumented_by_opentelemetry:
+        instrumentor.uninstrument()
 
 
 @pytest.fixture(scope="function")
@@ -152,7 +156,6 @@ def instrument_with_content(
     Config.use_legacy_attributes = True
     Config.event_logger = None
     os.environ.pop(LMNR_TRACE_CONTENT, None)
-    instrumentor.uninstrument()
 
 
 @pytest.fixture(scope="function")
@@ -172,7 +175,6 @@ def instrument_with_no_content(
     Config.use_legacy_attributes = True
     Config.event_logger = None
     os.environ.pop(LMNR_TRACE_CONTENT, None)
-    instrumentor.uninstrument()
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/test_instrumentations/test_openai/traces/test_chat.py b/tests/test_instrumentations/test_openai/traces/test_chat.py
@@ -945,7 +945,6 @@ def test_with_asyncio_run(
 
 
 @pytest.mark.vcr
-@pytest.mark.asyncio
 def test_with_asyncio_run_with_events_with_content(
     instrument_with_content, span_exporter, log_exporter, async_openai_client
 ):
@@ -994,7 +993,6 @@ def test_with_asyncio_run_with_events_with_content(
 
 
 @pytest.mark.vcr
-@pytest.mark.asyncio
 def test_with_asyncio_run_with_events_with_no_content(
     instrument_with_no_content, span_exporter, log_exporter, async_openai_client
 ):
@@ -1301,7 +1299,7 @@ def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict
 
 
 @pytest.mark.vcr
-def test_chat_history_message_dict(span_exporter, openai_client):
+def test_chat_history_message_dict(instrument_legacy, span_exporter, openai_client):
     first_user_message = {
         "role": "user",
         "content": "Generate a random noun in Korean. Respond with just that word.",
@@ -1371,7 +1369,7 @@ def test_chat_history_message_dict(span_exporter, openai_client):
 
 
 @pytest.mark.vcr
-def test_chat_history_message_pydantic(span_exporter, openai_client):
+def test_chat_history_message_pydantic(instrument_legacy, span_exporter, openai_client):
     first_user_message = {
         "role": "user",
         "content": "Generate a random noun in Korean. Respond with just that word.",
diff --git a/tests/test_litellm_openai.py b/tests/test_litellm_openai.py
@@ -34,8 +34,8 @@ def test_litellm_openai_basic(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -79,8 +79,8 @@ def test_litellm_openai_text_block(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -124,8 +124,8 @@ def test_litellm_openai_with_streaming(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.usage.input_tokens"] == 14
     assert span.attributes["gen_ai.usage.output_tokens"] == 7
@@ -178,12 +178,9 @@ def test_litellm_openai_with_chat_history(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 4
-    inner_spans = [s for s in spans if s.name == "litellm.completion"]
-    assert len(inner_spans) == 2
-    inner_spans = sorted(inner_spans, key=lambda s: s.start_time)
-    first_span = sorted(inner_spans, key=lambda s: s.start_time)[0]
-    second_span = sorted(inner_spans, key=lambda s: s.start_time)[1]
+    assert len(spans) == 2
+    first_span = sorted(spans, key=lambda s: s.start_time)[0]
+    second_span = sorted(spans, key=lambda s: s.start_time)[1]
     assert first_span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert second_span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
 
@@ -258,8 +255,8 @@ def test_litellm_openai_with_image_base64(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -313,8 +310,8 @@ def test_litellm_openai_with_image_url(
     time.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -374,8 +371,8 @@ async def test_async_litellm_openai_with_image_base64(
     await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -430,8 +427,8 @@ async def test_async_litellm_openai_with_image_url(
     await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -472,8 +469,8 @@ async def test_async_litellm_openai_basic(
     await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -518,8 +515,8 @@ async def test_async_litellm_openai_text_block(
     await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.response.model"] == "gpt-4.1-nano-2025-04-14"
     assert span.attributes["gen_ai.response.id"] == response.id
@@ -564,8 +561,8 @@ async def test_async_litellm_openai_with_streaming(
     await asyncio.sleep(SLEEP_TO_FLUSH_SECONDS)
 
     spans = span_exporter.get_finished_spans()
-    assert len(spans) == 2
-    span = [s for s in spans if s.name == "litellm.completion"][0]
+    assert len(spans) == 1
+    span = spans[0]
     assert span.attributes["gen_ai.request.model"] == "gpt-4.1-nano"
     assert span.attributes["gen_ai.usage.input_tokens"] == 14
     assert span.attributes["gen_ai.usage.output_tokens"] == 7
diff --git a/tests/test_observe.py b/tests/test_observe.py
@@ -5,7 +5,6 @@
 from lmnr import Laminar, observe
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry import trace
-from opentelemetry.trace import INVALID_SPAN_ID
 
 
 def test_observe(span_exporter: InMemorySpanExporter):