Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 74 additions & 3 deletions litellm/integrations/langfuse/langfuse_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,12 @@ def set_langfuse_otel_attributes(span: Span, kwargs, response_obj):
_utils.set_attributes(span, kwargs, response_obj)

#########################################################
# Set Langfuse specific attributes eg Langfuse Environment
# Set Langfuse specific attributes
#########################################################
LangfuseOtelLogger._set_langfuse_specific_attributes(
span=span,
kwargs=kwargs
kwargs=kwargs,
response_obj=response_obj
)
return

Expand Down Expand Up @@ -86,7 +87,7 @@ def _extract_langfuse_metadata(kwargs: dict) -> dict:
return metadata

@staticmethod
def _set_langfuse_specific_attributes(span: Span, kwargs):
def _set_langfuse_specific_attributes(span: Span, kwargs, response_obj):
"""
Sets Langfuse specific metadata attributes onto the OTEL span.

Expand All @@ -96,6 +97,7 @@ def _set_langfuse_specific_attributes(span: Span, kwargs):
compatibility.
"""
from litellm.integrations.arize._utils import safe_set_attribute
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps

# 1) Environment variable override
langfuse_environment = os.environ.get("LANGFUSE_TRACING_ENVIRONMENT")
Expand Down Expand Up @@ -141,6 +143,75 @@ def _set_langfuse_specific_attributes(span: Span, kwargs):
value = str(value)
safe_set_attribute(span, enum_attr.value, value)

# 3) Set observation input/output for better UI display
#
# These Langfuse-specific attributes provide better UI display,
# especially for tool calls and function calling.
# Set observation input (messages)
messages = kwargs.get("messages")
if messages:
safe_set_attribute(
span,
LangfuseSpanAttributes.OBSERVATION_INPUT.value,
safe_dumps(messages),
)

# Set observation output (response with tool_calls if present)
if response_obj and hasattr(response_obj, "get"):
choices = response_obj.get("choices", [])
if choices:
# Extract the first choice's message
first_choice = choices[0]
message = first_choice.get("message", {})

# Check if there are tool_calls
tool_calls = message.get("tool_calls")
if tool_calls:
# Transform tool_calls to Langfuse-expected format
transformed_tool_calls = []
for tool_call in tool_calls:
function = tool_call.get("function", {})
arguments_str = function.get("arguments", "{}")

# Parse arguments from JSON string to object
try:
arguments_obj = json.loads(arguments_str) if isinstance(arguments_str, str) else arguments_str
except json.JSONDecodeError:
arguments_obj = {}

# Create Langfuse-compatible tool call object
langfuse_tool_call = {
"id": response_obj.get("id", ""),
"name": function.get("name", ""),
"call_id": tool_call.get("id", ""),
"type": "function_call",
"arguments": arguments_obj,
}
transformed_tool_calls.append(langfuse_tool_call)

# Set the observation output with transformed tool_calls
safe_set_attribute(
span,
LangfuseSpanAttributes.OBSERVATION_OUTPUT.value,
safe_dumps(transformed_tool_calls),
)
else:
# No tool_calls, use regular content-based output
output_data = {}

if message.get("role"):
output_data["role"] = message.get("role")

if message.get("content") is not None:
output_data["content"] = message.get("content")

if output_data:
safe_set_attribute(
span,
LangfuseSpanAttributes.OBSERVATION_OUTPUT.value,
safe_dumps(output_data),
)

@staticmethod
def _get_langfuse_otel_host() -> Optional[str]:
"""
Expand Down
4 changes: 4 additions & 0 deletions litellm/types/integrations/langfuse_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class LangfuseSpanAttributes(str, Enum):
MASK_INPUT = "langfuse.generation.mask_input"
MASK_OUTPUT = "langfuse.generation.mask_output"

# ---- Observation input/output ----
OBSERVATION_INPUT = "langfuse.observation.input"
OBSERVATION_OUTPUT = "langfuse.observation.output"

# ---- Trace-level metadata ----
TRACE_USER_ID = "user.id"
SESSION_ID = "session.id"
Expand Down
113 changes: 107 additions & 6 deletions tests/test_litellm/integrations/test_langfuse_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_set_langfuse_environment_attribute(self):

with patch.dict(os.environ, {'LANGFUSE_TRACING_ENVIRONMENT': test_env}):
with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
LangfuseOtelLogger._set_langfuse_specific_attributes(mock_span, mock_kwargs)
LangfuseOtelLogger._set_langfuse_specific_attributes(mock_span, mock_kwargs, {})

# safe_set_attribute(span, key, value) → positional args
mock_safe_set_attribute.assert_called_once_with(
Expand Down Expand Up @@ -130,7 +130,7 @@ def add_metadata_from_header(litellm_params, metadata):
assert extracted.get("foo") == "bar"
assert extracted.get("enriched") is True

def test_set_langfuse_specific_attributes_full_mapping(self):
def test_set_langfuse_specific_attributes_metadata(self):
"""Verify every supported metadata key maps to the correct OTEL attribute and complex types are JSON-serialised."""
# Build a sample metadata payload covering all mappings
metadata = {
Expand All @@ -156,7 +156,7 @@ def test_set_langfuse_specific_attributes_full_mapping(self):

# Capture calls to safe_set_attribute
with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
LangfuseOtelLogger._set_langfuse_specific_attributes(MagicMock(), kwargs)
LangfuseOtelLogger._set_langfuse_specific_attributes(MagicMock(), kwargs, None)

# Build expected calls manually for clarity
from litellm.types.integrations.langfuse_otel import LangfuseSpanAttributes
Expand Down Expand Up @@ -189,6 +189,109 @@ def test_set_langfuse_specific_attributes_full_mapping(self):

assert actual == expected, "Mismatch between expected and actual OTEL attribute mapping."

def test_set_langfuse_specific_attributes_with_content(self):
"""Test that _set_langfuse_specific_attributes correctly sets observation.output with regular content response."""
from litellm.types.utils import Choices, ModelResponse
from litellm.types.integrations.langfuse_otel import LangfuseSpanAttributes

# Create response with content
response_obj = ModelResponse(
id='chatcmpl-test',
model='gpt-4o',
choices=[
Choices(
finish_reason='stop',
message={
"role": "assistant",
"content": "The weather in Tokyo is sunny."
}
)
],
)

kwargs = {
"messages": [{"role": "user", "content": "What's the weather in Tokyo?"}],
}

with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
LangfuseOtelLogger._set_langfuse_specific_attributes(MagicMock(), kwargs, response_obj)

expect_output = {
LangfuseSpanAttributes.OBSERVATION_INPUT.value: [
{
"role": "user",
"content": "What's the weather in Tokyo?"
}
],
LangfuseSpanAttributes.OBSERVATION_OUTPUT.value: {
"role": "assistant",
"content": "The weather in Tokyo is sunny."
}
}

# Flatten the actual calls into {key: value}
actual = {
call.args[1]: json.loads(call.args[2])
for call in mock_safe_set_attribute.call_args_list
}

assert actual == expect_output, "Mismatch in observation input/output OTEL attributes."


def test_set_langfuse_specific_attributes_with_tool_calls(self):
"""Test that _set_langfuse_specific_attributes correctly sets observation.output with tool calls in Langfuse format."""
from litellm.types.utils import Choices, Function, ChatCompletionMessageToolCall, ModelResponse
from litellm.types.integrations.langfuse_otel import LangfuseSpanAttributes

# Create response with tool calls
response_obj = ModelResponse(
id='chatcmpl-test',
model='gpt-4o',
choices=[
Choices(
finish_reason='tool_calls',
message={
"role": "assistant",
"content": None,
"tool_calls": [
ChatCompletionMessageToolCall(
function=Function(
arguments='{"location":"Tokyo"}',
name='get_weather'
),
id='call_123',
type='function'
)
]
}
)
],
)

with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
LangfuseOtelLogger._set_langfuse_specific_attributes(MagicMock(), {},
response_obj)

expected = {
LangfuseSpanAttributes.OBSERVATION_OUTPUT.value: [
{
"id": "chatcmpl-test",
"name": "get_weather",
"arguments": {"location": "Tokyo"},
"call_id": "call_123",
"type": "function_call"
}
]
}

# Flatten the actual calls into {key: value}
actual = {
call.args[1]: json.loads(call.args[2])
for call in mock_safe_set_attribute.call_args_list
}
assert actual == expected, "Mismatch in observation output OTEL attribute for tool calls."


def test_construct_dynamic_otel_headers_with_langfuse_keys(self):
"""Test that construct_dynamic_otel_headers creates proper auth headers when langfuse keys are provided."""
from litellm.types.utils import StandardCallbackDynamicParams
Expand Down Expand Up @@ -352,7 +455,7 @@ def test_responses_api_langfuse_specific_attributes(self):
mock_span = MagicMock()

with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
LangfuseOtelLogger._set_langfuse_specific_attributes(mock_span, kwargs)
LangfuseOtelLogger._set_langfuse_specific_attributes(mock_span, kwargs, {})

# Verify specific attributes were set
from litellm.types.integrations.langfuse_otel import LangfuseSpanAttributes
Expand All @@ -371,8 +474,6 @@ def test_responses_api_langfuse_specific_attributes(self):
for expected_call in expected_calls:
mock_safe_set_attribute.assert_any_call(*expected_call)




if __name__ == "__main__":
pytest.main([__file__])
Loading