Skip to content
59 changes: 47 additions & 12 deletions src/sentry/api/endpoints/organization_ai_conversations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import dataclasses
from collections import defaultdict
from datetime import datetime

from rest_framework import serializers
from rest_framework.request import Request
Expand All @@ -17,6 +15,7 @@
from sentry.search.eap.types import SearchResolverConfig
from sentry.snuba.referrer import Referrer
from sentry.snuba.spans_rpc import Spans
from sentry.utils import json, logger
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be imported from python std lib, right?



class OrganizationAIConversationsSerializer(serializers.Serializer):
Expand Down Expand Up @@ -110,6 +109,10 @@ def _get_conversations(
_sort: Sort field and direction (currently only supports timestamp sorting, unused for now)
_query: Search query (not yet implemented)
"""
logger.info(
"[ai-conversations] Getting conversations",
extra={"snuba_params": snuba_params.to_dict()},
)
# Step 1: Find conversation IDs with spans in the time range
conversation_ids_results = Spans.run_table_query(
params=snuba_params,
Expand All @@ -123,9 +126,11 @@ def _get_conversations(
limit=limit,
referrer=Referrer.API_AI_CONVERSATIONS.value,
config=SearchResolverConfig(auto_fields=True),
sampling_mode="HIGHEST_ACCURACY",
sampling_mode=None,
)
Comment on lines 126 to 130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: First query uses sampling_mode=None instead of HIGHEST_ACCURACY for AI conversation ID discovery.
Severity: CRITICAL | Confidence: 1.00

🔍 Detailed Analysis

The initial query for discovering AI conversation IDs uses sampling_mode=None. This violates explicit repository requirements stating that AI conversation data queries must use sampling_mode="HIGHEST_ACCURACY" to ensure complete and non-extrapolated results. This leads to silently returning incomplete conversation data.

💡 Suggested Fix

Change sampling_mode=None to sampling_mode="HIGHEST_ACCURACY" for the initial conversation ID discovery query to comply with data completeness requirements.

🤖 Prompt for AI Agent
Fix this bug. In src/sentry/api/endpoints/organization_ai_conversations.py at lines
126-130: The initial query for discovering AI conversation IDs uses
`sampling_mode=None`. This violates explicit repository requirements stating that AI
conversation data queries must use `sampling_mode="HIGHEST_ACCURACY"` to ensure complete
and non-extrapolated results. This leads to silently returning incomplete conversation
data.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The initial query for discovering AI conversation IDs uses sampling_mode=None. This violates explicit repository requirements stating that AI conversation data queries must use sampling_mode="HIGHEST_ACCURACY" to ensure complete and non-extrapolated results. This leads to silently returning incomplete conversation data.

While it is true that with sampling_mode=None some results might be sampled out, the "repository requirements" seem to be completely hallucinated


logger.info("[ai-conversations] Got Conversation IDs results: {conversation_ids_results}")

conversation_ids: list[str] = [
conv_id
for row in conversation_ids_results.get("data", [])
Expand All @@ -135,15 +140,19 @@ def _get_conversations(
if not conversation_ids:
return []

# Step 2: Get complete aggregations for these conversations (all time)
all_time_params = dataclasses.replace(
snuba_params,
start=datetime(2020, 1, 1),
end=datetime(2100, 1, 1),
)
# # Step 2: Get complete aggregations for these conversations (all time)
# all_time_params = dataclasses.replace(
# snuba_params,
# start=datetime(2020, 1, 1),
# end=datetime(2100, 1, 1),
# )

logger.info(
"[ai-conversations] Getting complete aggregations for conversations",
extra={"conversation_ids": conversation_ids},
)
results = Spans.run_table_query(
params=all_time_params,
params=snuba_params,
query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]",
selected_columns=[
"gen_ai.conversation.id",
Expand All @@ -164,6 +173,11 @@ def _get_conversations(
sampling_mode="HIGHEST_ACCURACY",
)

logger.info(
"[ai-conversations] Got complete aggregations for conversations",
extra={"results": json.dumps(results)},
)

# Create a map of conversation data by ID
conversations_map = {}
for row in results.get("data", []):
Expand All @@ -187,6 +201,11 @@ def _get_conversations(
"traceIds": [],
}

logger.info(
"[ai-conversations] Got conversations map",
extra={"conversations_map": json.dumps(conversations_map)},
)

# Preserve the order from step 1
conversations = [
conversations_map[conv_id]
Expand All @@ -195,7 +214,7 @@ def _get_conversations(
]

if conversations:
self._enrich_conversations(all_time_params, conversations)
self._enrich_conversations(snuba_params, conversations)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Conversation Data Incomplete After Time Range Change

The change from all_time_params to snuba_params restricts conversation aggregations and enrichment to the user's query time range. This means metrics (like duration, tokens, cost, errors) and enrichment data (flows, trace IDs) will be incomplete for conversations that span beyond the specified time window, deviating from the original intent to show complete conversation data.

Fix in Cursor Fix in Web


return conversations

Expand All @@ -206,6 +225,10 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None
conversation_ids = [conv["conversationId"] for conv in conversations]

# Query all spans for these conversations to get both agent flows and trace IDs
logger.info(
"[ai-conversations] Enriching conversations",
extra={"conversation_ids": conversation_ids},
)
all_spans_results = Spans.run_table_query(
params=snuba_params,
query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]",
Expand All @@ -224,10 +247,17 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None
config=SearchResolverConfig(auto_fields=True),
sampling_mode="HIGHEST_ACCURACY",
)
logger.info(
"[ai-conversations] Got all spans results",
extra={"all_spans_results": json.dumps(all_spans_results)},
)

flows_by_conversation = defaultdict(list)
traces_by_conversation = defaultdict(set)

logger.info(
"[ai-conversations] Collecting traces and flows",
extra={"all_spans_results": json.dumps(all_spans_results)},
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Logging Fails with Non-Serializable Data

The json.dumps calls in logging statements (lines 181, 255, 262) operate on raw Snuba query results. These results can include non-JSON-serializable types like Decimal, which causes a TypeError and crashes the API endpoint.

Fix in Cursor Fix in Web

for row in all_spans_results.get("data", []):
conv_id = row.get("gen_ai.conversation.id", "")
if not conv_id:
Expand All @@ -248,3 +278,8 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None
conv_id = conversation["conversationId"]
conversation["flow"] = flows_by_conversation.get(conv_id, [])
conversation["traceIds"] = list(traces_by_conversation.get(conv_id, set()))

logger.info(
"[ai-conversations] Enriched conversations",
extra={"conversations": json.dumps(conversations)},
)
Loading