From 3d508009540f3c26c616de9f07436e0165daf80f Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:40:13 +0200 Subject: [PATCH 1/6] fix(ai-insights): remove sampling mode from query --- .../organization_ai_conversations.py | 59 +++++++++++++++---- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/src/sentry/api/endpoints/organization_ai_conversations.py b/src/sentry/api/endpoints/organization_ai_conversations.py index b6ecd94e85c04b..33816e2f3b0411 100644 --- a/src/sentry/api/endpoints/organization_ai_conversations.py +++ b/src/sentry/api/endpoints/organization_ai_conversations.py @@ -1,6 +1,4 @@ -import dataclasses from collections import defaultdict -from datetime import datetime from rest_framework import serializers from rest_framework.request import Request @@ -17,6 +15,7 @@ from sentry.search.eap.types import SearchResolverConfig from sentry.snuba.referrer import Referrer from sentry.snuba.spans_rpc import Spans +from sentry.utils import json, logger class OrganizationAIConversationsSerializer(serializers.Serializer): @@ -110,6 +109,10 @@ def _get_conversations( _sort: Sort field and direction (currently only supports timestamp sorting, unused for now) _query: Search query (not yet implemented) """ + logger.info( + "[ai-conversations] Getting conversations", + extra={"snuba_params": snuba_params.to_dict()}, + ) # Step 1: Find conversation IDs with spans in the time range conversation_ids_results = Spans.run_table_query( params=snuba_params, @@ -123,9 +126,11 @@ def _get_conversations( limit=limit, referrer=Referrer.API_AI_CONVERSATIONS.value, config=SearchResolverConfig(auto_fields=True), - sampling_mode="HIGHEST_ACCURACY", + sampling_mode=None, ) + logger.info("[ai-conversations] Got Conversation IDs results: {conversation_ids_results}") + conversation_ids: list[str] = [ conv_id for row in conversation_ids_results.get("data", []) @@ -135,15 +140,19 @@ def _get_conversations( if not conversation_ids: return [] - # Step 2: Get complete aggregations for these conversations (all time) - all_time_params = dataclasses.replace( - snuba_params, - start=datetime(2020, 1, 1), - end=datetime(2100, 1, 1), - ) + # # Step 2: Get complete aggregations for these conversations (all time) + # all_time_params = dataclasses.replace( + # snuba_params, + # start=datetime(2020, 1, 1), + # end=datetime(2100, 1, 1), + # ) + logger.info( + "[ai-conversations] Getting complete aggregations for conversations", + extra={"conversation_ids": conversation_ids}, + ) results = Spans.run_table_query( - params=all_time_params, + params=snuba_params, query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]", selected_columns=[ "gen_ai.conversation.id", @@ -164,6 +173,11 @@ def _get_conversations( sampling_mode="HIGHEST_ACCURACY", ) + logger.info( + "[ai-conversations] Got complete aggregations for conversations", + extra={"results": json.dumps(results)}, + ) + # Create a map of conversation data by ID conversations_map = {} for row in results.get("data", []): @@ -187,6 +201,11 @@ def _get_conversations( "traceIds": [], } + logger.info( + "[ai-conversations] Got conversations map", + extra={"conversations_map": json.dumps(conversations_map)}, + ) + # Preserve the order from step 1 conversations = [ conversations_map[conv_id] @@ -195,7 +214,7 @@ def _get_conversations( ] if conversations: - self._enrich_conversations(all_time_params, conversations) + self._enrich_conversations(snuba_params, conversations) return conversations @@ -206,6 +225,10 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None conversation_ids = [conv["conversationId"] for conv in conversations] # Query all spans for these conversations to get both agent flows and trace IDs + logger.info( + "[ai-conversations] Enriching conversations", + extra={"conversation_ids": conversation_ids}, + ) all_spans_results = Spans.run_table_query( params=snuba_params, query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]", @@ -224,10 +247,17 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None config=SearchResolverConfig(auto_fields=True), sampling_mode="HIGHEST_ACCURACY", ) + logger.info( + "[ai-conversations] Got all spans results", + extra={"all_spans_results": json.dumps(all_spans_results)}, + ) flows_by_conversation = defaultdict(list) traces_by_conversation = defaultdict(set) - + logger.info( + "[ai-conversations] Collecting traces and flows", + extra={"all_spans_results": json.dumps(all_spans_results)}, + ) for row in all_spans_results.get("data", []): conv_id = row.get("gen_ai.conversation.id", "") if not conv_id: @@ -248,3 +278,8 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None conv_id = conversation["conversationId"] conversation["flow"] = flows_by_conversation.get(conv_id, []) conversation["traceIds"] = list(traces_by_conversation.get(conv_id, set())) + + logger.info( + "[ai-conversations] Enriched conversations", + extra={"conversations": json.dumps(conversations)}, + ) From 4bcbb241ef7ec386458d06c0dc6141637d63ce24 Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:19:36 +0200 Subject: [PATCH 2/6] fix --- src/sentry/api/endpoints/organization_ai_conversations.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sentry/api/endpoints/organization_ai_conversations.py b/src/sentry/api/endpoints/organization_ai_conversations.py index 33816e2f3b0411..75edca2d01e43c 100644 --- a/src/sentry/api/endpoints/organization_ai_conversations.py +++ b/src/sentry/api/endpoints/organization_ai_conversations.py @@ -1,3 +1,4 @@ +import json # noqa: S003 from collections import defaultdict from rest_framework import serializers @@ -15,7 +16,7 @@ from sentry.search.eap.types import SearchResolverConfig from sentry.snuba.referrer import Referrer from sentry.snuba.spans_rpc import Spans -from sentry.utils import json, logger +from sentry.utils import logger class OrganizationAIConversationsSerializer(serializers.Serializer): @@ -129,7 +130,7 @@ def _get_conversations( sampling_mode=None, ) - logger.info("[ai-conversations] Got Conversation IDs results: {conversation_ids_results}") + logger.info(f"[ai-conversations] Got Conversation IDs results: {conversation_ids_results}") conversation_ids: list[str] = [ conv_id From a6e9daea6806d2b9a3b5459bd53593d46b968290 Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:21:33 +0200 Subject: [PATCH 3/6] logger --- .../api/endpoints/organization_ai_conversations.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sentry/api/endpoints/organization_ai_conversations.py b/src/sentry/api/endpoints/organization_ai_conversations.py index 75edca2d01e43c..c3ae06ae80e74b 100644 --- a/src/sentry/api/endpoints/organization_ai_conversations.py +++ b/src/sentry/api/endpoints/organization_ai_conversations.py @@ -1,4 +1,5 @@ import json # noqa: S003 +import logging from collections import defaultdict from rest_framework import serializers @@ -16,7 +17,8 @@ from sentry.search.eap.types import SearchResolverConfig from sentry.snuba.referrer import Referrer from sentry.snuba.spans_rpc import Spans -from sentry.utils import logger + +logger = logging.getLogger("sentry.api.endpoints.organization_ai_conversations") class OrganizationAIConversationsSerializer(serializers.Serializer): @@ -130,7 +132,10 @@ def _get_conversations( sampling_mode=None, ) - logger.info(f"[ai-conversations] Got Conversation IDs results: {conversation_ids_results}") + logger.info( + "[ai-conversations] Got Conversation IDs results", + extra={"conversation_ids_results": conversation_ids_results}, + ) conversation_ids: list[str] = [ conv_id From b3ff227be968149cd969d8548f7955691580f579 Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:50:53 +0200 Subject: [PATCH 4/6] fix --- src/sentry/api/endpoints/organization_ai_conversations.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/sentry/api/endpoints/organization_ai_conversations.py b/src/sentry/api/endpoints/organization_ai_conversations.py index c3ae06ae80e74b..77acf84646d476 100644 --- a/src/sentry/api/endpoints/organization_ai_conversations.py +++ b/src/sentry/api/endpoints/organization_ai_conversations.py @@ -112,10 +112,7 @@ def _get_conversations( _sort: Sort field and direction (currently only supports timestamp sorting, unused for now) _query: Search query (not yet implemented) """ - logger.info( - "[ai-conversations] Getting conversations", - extra={"snuba_params": snuba_params.to_dict()}, - ) + # Step 1: Find conversation IDs with spans in the time range conversation_ids_results = Spans.run_table_query( params=snuba_params, From 8c645e960a3df2a811e060daa623be13d53dce82 Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Tue, 28 Oct 2025 10:43:26 +0100 Subject: [PATCH 5/6] test fix --- .../sentry/api/endpoints/test_organization_ai_conversations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sentry/api/endpoints/test_organization_ai_conversations.py b/tests/sentry/api/endpoints/test_organization_ai_conversations.py index 631a06b55d241f..9c2a166349eda7 100644 --- a/tests/sentry/api/endpoints/test_organization_ai_conversations.py +++ b/tests/sentry/api/endpoints/test_organization_ai_conversations.py @@ -431,7 +431,7 @@ def test_flow_ordering(self) -> None: def test_complete_conversation_data_across_time_range(self) -> None: """Test that conversations show complete data even when spans are outside time range""" - now = before_now(days=90).replace(microsecond=0) + now = before_now(days=15).replace(microsecond=0) conversation_id = uuid4().hex trace_id = uuid4().hex From a954c2a561409ffff0268c34a68031803504104a Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Tue, 28 Oct 2025 14:53:26 +0100 Subject: [PATCH 6/6] fix test --- .../api/endpoints/test_organization_ai_conversations.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/sentry/api/endpoints/test_organization_ai_conversations.py b/tests/sentry/api/endpoints/test_organization_ai_conversations.py index 9c2a166349eda7..09e1a3c246804f 100644 --- a/tests/sentry/api/endpoints/test_organization_ai_conversations.py +++ b/tests/sentry/api/endpoints/test_organization_ai_conversations.py @@ -470,7 +470,6 @@ def test_complete_conversation_data_across_time_range(self) -> None: conversation = response.data[0] assert conversation["conversationId"] == conversation_id - assert conversation["llmCalls"] == 2 - assert conversation["totalTokens"] == 150 - assert conversation["totalCost"] == 0.015 - assert conversation["duration"] > timedelta(days=6).total_seconds() * 1000 + assert conversation["llmCalls"] == 1 + assert conversation["totalTokens"] == 50 + assert conversation["totalCost"] == 0.005