diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/CHANGELOG.md index 4ae7b7b0..ad513677 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/CHANGELOG.md +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Fixed +- **LLM Content Capture**: Fixed critical bug where LLM layer input/output messages were not being captured in spans + - Removed duplicate message conversion in `convert_agentscope_messages_to_genai_format` that was causing content loss + - Dict messages are already in parts format from `extract_llm_attributes`, no need to re-convert with `get_message_converter` + - Simplified message conversion logic to avoid redundant transformations + - Enhanced test coverage with comprehensive LLM I/O content verification + ### Breaking Changes - **Minimum AgentScope version requirement**: Only supports AgentScope 1.0.0 and above. Previous 0.x versions are not supported. diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py index 10782e11..3225de68 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/src/opentelemetry/instrumentation/agentscope/utils.py @@ -432,14 +432,7 @@ def convert_agentscope_messages_to_genai_format( if isinstance(msg, Msg): msg_dict = _format_msg_to_parts(msg) elif isinstance(msg, dict): - if provider_name: - try: - converted = get_message_converter(provider_name)([msg]) - msg_dict = converted[0] if converted else msg - except Exception: - msg_dict = msg - else: - msg_dict = msg + msg_dict = msg else: continue diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_span_content.py b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_span_content.py index c6e201e4..063407a1 100644 --- a/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_span_content.py +++ b/instrumentation-loongsuite/loongsuite-instrumentation-agentscope/tests/test_span_content.py @@ -2,6 +2,7 @@ """Test Span Content Capture - Verify input/output capture""" import asyncio +import json import pytest from agentscope.agent import ReActAgent @@ -27,7 +28,7 @@ def test_span_content_with_span_only( instrument_with_content, request, ): - """Test if input/output is captured in SPAN_ONLY mode""" + """Test if input/output is captured in SPAN_ONLY mode - both Agent and LLM layers""" # agentscope.init already called in fixture # agentscope.init(project="test_span_content") toolkit = Toolkit() @@ -53,68 +54,136 @@ async def run(): # Get spans spans = span_exporter.get_finished_spans() - print(f"\n=== Found {len(spans)} spans ===") + print_span_tree(spans) - # Find chat span - chat_spans = [ + agent_spans = [ s for s in spans if s.attributes and s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) - == "chat" + == "invoke_agent" ] - assert len(chat_spans) > 0, "Expected at least one chat span" - - # Check attributes of first chat span - chat_span = chat_spans[0] - attrs = dict(chat_span.attributes) - - print("\n=== Chat Span Attributes ===") - for key, value in sorted(attrs.items()): - if ( - "message" in key.lower() - or "input" in key.lower() - or "output" in key.lower() - ): - print( - f"{key}: {value[:100] if isinstance(value, str) and len(value) > 100 else value}" - ) - - # Verify if input/output is captured - has_input = any( - "input" in k.lower() or "prompt" in k.lower() for k in attrs.keys() - ) - has_output = any( - "output" in k.lower() - or "completion" in k.lower() - or "response" in k.lower() - for k in attrs.keys() + assert ( + len(agent_spans) > 0 + ), "Expected at least one invoke_agent span" + + agent_span = agent_spans[0] + agent_attrs = dict(agent_span.attributes) + + print(f"Agent span name: {agent_span.name}") + print(f"Agent name: {agent_attrs.get('gen_ai.agent.name')}") + + # Verify Agent input content + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in agent_attrs + ), "Agent span missing GEN_AI_INPUT_MESSAGES" + + agent_input = agent_attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + print(f"\nAgent input content captured (first 200 chars):") + print(f" {agent_input[:200] if isinstance(agent_input, str) else agent_input}") + + # Verify input content contains user message + if isinstance(agent_input, str): + try: + input_data = json.loads(agent_input) + assert "Hello" in str( + input_data + ), "Agent input should contain 'Hello'" + except json.JSONDecodeError: + assert "Hello" in agent_input, "Agent input should contain 'Hello'" + + # Verify Agent output content + assert ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in agent_attrs + ), "Agent span missing GEN_AI_OUTPUT_MESSAGES" + + agent_output = agent_attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] + print(f"\nAgent output content captured (first 200 chars):") + print( + f" {agent_output[:200] if isinstance(agent_output, str) else agent_output}" ) - print(f"\nHas input-related attributes: {has_input}") - print(f"Has output-related attributes: {has_output}") + # ==================== Verify LLM Layer ==================== - # Print all attribute keys - print(f"\nAll attribute keys: {sorted(attrs.keys())}") + chat_spans = [ + s + for s in spans + if s.attributes + and s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == "chat" + ] - # Verify basic GenAI attributes - assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs - assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs + assert len(chat_spans) > 0, "Expected at least one chat span" - # Check if there are input messages or output messages - if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs: - print("\n✓ Found GEN_AI_INPUT_MESSAGES") - print( - f" Value: {attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES][:200]}..." - ) + chat_span = chat_spans[0] + chat_attrs = dict(chat_span.attributes) + + print(f"Chat span name: {chat_span.name}") + print(f"Model: {chat_attrs.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)}") + + # Verify LLM input content + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in chat_attrs + ), "LLM chat span missing GEN_AI_INPUT_MESSAGES - this is a critical issue" + + llm_input = chat_attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + print(f"\nLLM input content captured (first 200 chars):") + print(f" {llm_input[:200] if isinstance(llm_input, str) else llm_input}") + + # Verify input content structure and content + if isinstance(llm_input, str): + try: + input_msgs = json.loads(llm_input) + assert isinstance(input_msgs, list), "LLM input should be a list" + assert len(input_msgs) > 0, "LLM input should not be empty" + + # Check if there's a user message (content is in parts array) + has_user_msg = False + for msg in input_msgs: + if msg.get("role") == "user": + parts = msg.get("parts", []) + for part in parts: + if part.get("type") == "text" and "Hello" in part.get("content", ""): + has_user_msg = True + break + + assert has_user_msg, "LLM input should contain user message with 'Hello'" + print(" LLM input contains user message") + except json.JSONDecodeError as e: + pytest.fail(f"LLM input is not valid JSON: {e}") + + # Verify LLM output content + assert ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in chat_attrs + ), "LLM chat span missing GEN_AI_OUTPUT_MESSAGES - this is a critical issue" + + llm_output = chat_attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] + + + # Verify output content structure + if isinstance(llm_output, str): + try: + output_msgs = json.loads(llm_output) + assert isinstance(output_msgs, list), "LLM output should be a list" + assert len(output_msgs) > 0, "LLM output should not be empty" + print(" LLM output structure is correct") + except json.JSONDecodeError as e: + pytest.fail(f"LLM output is not valid JSON: {e}") + + # ==================== Verify Span Hierarchy ==================== + + # Verify that chat span is part of the same trace + if agent_span and chat_span: + assert ( + chat_span.parent + ), "Chat span should have a parent" + assert ( + chat_span.context.trace_id == agent_span.context.trace_id + ), "Chat span should be in the same trace as agent span" + print("LLM chat span is in the same trace as agent invoke_agent span") - if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs: - print("\n✓ Found GEN_AI_OUTPUT_MESSAGES") - print( - f" Value: {attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES][:200]}..." - ) @pytest.mark.vcr() def test_span_content_with_span_and_event( @@ -156,7 +225,31 @@ async def run(): logs = log_exporter.get_finished_logs() print(f"\n=== Found {len(logs)} log events ===") - # Find chat span + # ==================== Verify Agent Layer Span ==================== + agent_spans = [ + s + for s in spans + if s.attributes + and s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == "invoke_agent" + ] + + if len(agent_spans) > 0: + agent_span = agent_spans[0] + agent_attrs = dict(agent_span.attributes) + + print("\n=== Agent Span Content ===") + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in agent_attrs + ), "Agent span missing input content" + assert ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in agent_attrs + ), "Agent span missing output content" + + print("Agent span has input messages") + print("Agent span has output messages") + + # ==================== Verify LLM Layer Span ==================== chat_spans = [ s for s in spans @@ -165,17 +258,43 @@ async def run(): == "chat" ] - if len(chat_spans) > 0: - chat_span = chat_spans[0] - attrs = dict(chat_span.attributes) - - print("\n=== Checking for content in span ===") - if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs: - print("✓ Span has input messages") - if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs: - print("✓ Span has output messages") + assert len(chat_spans) > 0, "Expected at least one chat span" - # If there are log events, check content + chat_span = chat_spans[0] + chat_attrs = dict(chat_span.attributes) + + print("\n=== LLM Chat Span Content ===") + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in chat_attrs + ), "LLM chat span missing GEN_AI_INPUT_MESSAGES" + assert ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in chat_attrs + ), "LLM chat span missing GEN_AI_OUTPUT_MESSAGES" + + print("LLM span has input messages") + print("LLM span has output messages") + + # Verify input content + llm_input = chat_attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + if isinstance(llm_input, str): + try: + input_msgs = json.loads(llm_input) + assert len(input_msgs) > 0, "LLM input should not be empty" + print(f" Input messages count: {len(input_msgs)}") + except json.JSONDecodeError: + pass + + # Verify output content + llm_output = chat_attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] + if isinstance(llm_output, str): + try: + output_msgs = json.loads(llm_output) + assert len(output_msgs) > 0, "LLM output should not be empty" + print(f" Output messages count: {len(output_msgs)}") + except json.JSONDecodeError: + pass + + # ==================== Verify Events ==================== if len(logs) > 0: print("\n=== Checking for content in events ===") for i, log in enumerate(logs): @@ -191,7 +310,9 @@ async def run(): for k in attrs.keys() ) if has_content: - print(" ✓ Has content attributes") + print(" Has content attributes") + + print("\nSPAN_AND_EVENT mode test passed") @pytest.mark.vcr() def test_span_content_disabled( @@ -200,7 +321,7 @@ def test_span_content_disabled( instrument_no_content, request, ): - """Test when content capture is disabled""" + """Test when content capture is disabled - Both Agent and LLM layers should not have content""" # agentscope.init already called in fixture # agentscope.init(project="test_no_content") toolkit = Toolkit() @@ -226,6 +347,44 @@ async def run(): # Verify spans spans = span_exporter.get_finished_spans() + print(f"\n=== Found {len(spans)} spans ===") + + # ==================== Verify Agent Layer ==================== + agent_spans = [ + s + for s in spans + if s.attributes + and s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == "invoke_agent" + ] + + if len(agent_spans) > 0: + agent_span = agent_spans[0] + agent_attrs = dict(agent_span.attributes) + + print("\n=== Agent Span (Content capture disabled) ===") + + # Content should not be captured + has_input_messages = GenAIAttributes.GEN_AI_INPUT_MESSAGES in agent_attrs + has_output_messages = ( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in agent_attrs + ) + + print(f"Has input messages: {has_input_messages}") + print(f"Has output messages: {has_output_messages}") + + assert ( + not has_input_messages + ), "Agent span should NOT have input messages when content capture is disabled" + assert ( + not has_output_messages + ), "Agent span should NOT have output messages when content capture is disabled" + + # But basic attributes should exist + assert GenAIAttributes.GEN_AI_OPERATION_NAME in agent_attrs + print("Agent span correct: no content captured, but has basic attributes") + + # ==================== Verify LLM Layer ==================== chat_spans = [ s for s in spans @@ -236,18 +395,30 @@ async def run(): if len(chat_spans) > 0: chat_span = chat_spans[0] - attrs = dict(chat_span.attributes) + chat_attrs = dict(chat_span.attributes) + + print("\n=== LLM Chat Span (Content capture disabled) ===") - print("\n=== Content capture disabled ===") # Content should not be captured - has_input_messages = GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs + has_input_messages = GenAIAttributes.GEN_AI_INPUT_MESSAGES in chat_attrs has_output_messages = ( - GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in chat_attrs ) print(f"Has input messages: {has_input_messages}") print(f"Has output messages: {has_output_messages}") + assert ( + not has_input_messages + ), "LLM chat span should NOT have input messages when content capture is disabled" + assert ( + not has_output_messages + ), "LLM chat span should NOT have output messages when content capture is disabled" + # But basic attributes should exist - assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs - assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs + assert GenAIAttributes.GEN_AI_OPERATION_NAME in chat_attrs + assert GenAIAttributes.GEN_AI_REQUEST_MODEL in chat_attrs + print("LLM span correct: no content captured, but has basic attributes") + + print("\nNO_CONTENT mode test passed: Neither Agent nor LLM layer captured content") +