-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
fix(ai-insights): remove sampling mode from query #102068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
3d50800
4bcbb24
a6e9dae
b3ff227
9081306
0a83063
8c645e9
c431649
2e1fa51
f730b66
a954c2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,4 @@ | ||
| import dataclasses | ||
| from collections import defaultdict | ||
| from datetime import datetime | ||
|
|
||
| from rest_framework import serializers | ||
| from rest_framework.request import Request | ||
|
|
@@ -17,6 +15,7 @@ | |
| from sentry.search.eap.types import SearchResolverConfig | ||
| from sentry.snuba.referrer import Referrer | ||
| from sentry.snuba.spans_rpc import Spans | ||
| from sentry.utils import json, logger | ||
|
|
||
|
|
||
| class OrganizationAIConversationsSerializer(serializers.Serializer): | ||
|
|
@@ -110,6 +109,10 @@ def _get_conversations( | |
| _sort: Sort field and direction (currently only supports timestamp sorting, unused for now) | ||
| _query: Search query (not yet implemented) | ||
| """ | ||
| logger.info( | ||
| "[ai-conversations] Getting conversations", | ||
| extra={"snuba_params": snuba_params.to_dict()}, | ||
| ) | ||
| # Step 1: Find conversation IDs with spans in the time range | ||
| conversation_ids_results = Spans.run_table_query( | ||
| params=snuba_params, | ||
|
|
@@ -123,9 +126,11 @@ def _get_conversations( | |
| limit=limit, | ||
| referrer=Referrer.API_AI_CONVERSATIONS.value, | ||
| config=SearchResolverConfig(auto_fields=True), | ||
| sampling_mode="HIGHEST_ACCURACY", | ||
| sampling_mode=None, | ||
obostjancic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
|
Comment on lines
126
to
130
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: First query uses 🔍 Detailed AnalysisThe initial query for discovering AI conversation IDs uses 💡 Suggested FixChange 🤖 Prompt for AI AgentDid we get this right? 👍 / 👎 to inform future reviews. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
While it is true that with |
||
|
|
||
| logger.info("[ai-conversations] Got Conversation IDs results: {conversation_ids_results}") | ||
obostjancic marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| conversation_ids: list[str] = [ | ||
| conv_id | ||
| for row in conversation_ids_results.get("data", []) | ||
|
|
@@ -135,15 +140,19 @@ def _get_conversations( | |
| if not conversation_ids: | ||
| return [] | ||
|
|
||
| # Step 2: Get complete aggregations for these conversations (all time) | ||
| all_time_params = dataclasses.replace( | ||
| snuba_params, | ||
| start=datetime(2020, 1, 1), | ||
| end=datetime(2100, 1, 1), | ||
| ) | ||
| # # Step 2: Get complete aggregations for these conversations (all time) | ||
| # all_time_params = dataclasses.replace( | ||
| # snuba_params, | ||
| # start=datetime(2020, 1, 1), | ||
| # end=datetime(2100, 1, 1), | ||
| # ) | ||
|
|
||
| logger.info( | ||
| "[ai-conversations] Getting complete aggregations for conversations", | ||
| extra={"conversation_ids": conversation_ids}, | ||
| ) | ||
| results = Spans.run_table_query( | ||
| params=all_time_params, | ||
| params=snuba_params, | ||
| query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]", | ||
| selected_columns=[ | ||
| "gen_ai.conversation.id", | ||
|
|
@@ -164,6 +173,11 @@ def _get_conversations( | |
| sampling_mode="HIGHEST_ACCURACY", | ||
| ) | ||
|
|
||
| logger.info( | ||
| "[ai-conversations] Got complete aggregations for conversations", | ||
| extra={"results": json.dumps(results)}, | ||
| ) | ||
|
|
||
| # Create a map of conversation data by ID | ||
| conversations_map = {} | ||
| for row in results.get("data", []): | ||
|
|
@@ -187,6 +201,11 @@ def _get_conversations( | |
| "traceIds": [], | ||
| } | ||
|
|
||
| logger.info( | ||
| "[ai-conversations] Got conversations map", | ||
| extra={"conversations_map": json.dumps(conversations_map)}, | ||
| ) | ||
|
|
||
| # Preserve the order from step 1 | ||
| conversations = [ | ||
| conversations_map[conv_id] | ||
|
|
@@ -195,7 +214,7 @@ def _get_conversations( | |
| ] | ||
|
|
||
| if conversations: | ||
| self._enrich_conversations(all_time_params, conversations) | ||
| self._enrich_conversations(snuba_params, conversations) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Conversation Data Incomplete After Time Range ChangeThe change from |
||
|
|
||
| return conversations | ||
|
|
||
|
|
@@ -206,6 +225,10 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None | |
| conversation_ids = [conv["conversationId"] for conv in conversations] | ||
|
|
||
| # Query all spans for these conversations to get both agent flows and trace IDs | ||
| logger.info( | ||
| "[ai-conversations] Enriching conversations", | ||
| extra={"conversation_ids": conversation_ids}, | ||
| ) | ||
| all_spans_results = Spans.run_table_query( | ||
| params=snuba_params, | ||
| query_string=f"gen_ai.conversation.id:[{','.join(conversation_ids)}]", | ||
|
|
@@ -224,10 +247,17 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None | |
| config=SearchResolverConfig(auto_fields=True), | ||
| sampling_mode="HIGHEST_ACCURACY", | ||
| ) | ||
| logger.info( | ||
| "[ai-conversations] Got all spans results", | ||
| extra={"all_spans_results": json.dumps(all_spans_results)}, | ||
| ) | ||
|
|
||
| flows_by_conversation = defaultdict(list) | ||
| traces_by_conversation = defaultdict(set) | ||
|
|
||
| logger.info( | ||
| "[ai-conversations] Collecting traces and flows", | ||
| extra={"all_spans_results": json.dumps(all_spans_results)}, | ||
| ) | ||
obostjancic marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| for row in all_spans_results.get("data", []): | ||
| conv_id = row.get("gen_ai.conversation.id", "") | ||
| if not conv_id: | ||
|
|
@@ -248,3 +278,8 @@ def _enrich_conversations(self, snuba_params, conversations: list[dict]) -> None | |
| conv_id = conversation["conversationId"] | ||
| conversation["flow"] = flows_by_conversation.get(conv_id, []) | ||
| conversation["traceIds"] = list(traces_by_conversation.get(conv_id, set())) | ||
|
|
||
| logger.info( | ||
| "[ai-conversations] Enriched conversations", | ||
| extra={"conversations": json.dumps(conversations)}, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should be imported from python std lib, right?