Skip to content

feat(uptime): Use EAP to query in organization_uptime_stats #94597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,8 @@ def register_temporary_features(manager: FeatureManager):
manager.add("organizations:uptime-detector-create-issues", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable sending uptime results to EAP (Events Analytics Platform)
manager.add("organizations:uptime-eap-results", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable querying uptime data from EAP uptime_results instead of uptime_checks
manager.add("organizations:uptime-eap-uptime-results-query", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
manager.add("organizations:use-metrics-layer", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
manager.add("organizations:user-feedback-ai-summaries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable auto spam classification at User Feedback ingest time
Expand Down
99 changes: 76 additions & 23 deletions src/sentry/uptime/endpoints/organization_uptime_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@
Function,
StrArray,
)
from sentry_protos.snuba.v1.trace_item_filter_pb2 import ComparisonFilter, TraceItemFilter
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
AndFilter,
ComparisonFilter,
TraceItemFilter,
)

from sentry import options
from sentry import features, options
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import StatsArgsDict, StatsMixin, region_silo_endpoint
Expand Down Expand Up @@ -74,15 +78,35 @@ def get(self, request: Request, organization: Organization) -> Response:
)

try:
eap_response = self._make_eap_request(
organization, projects, subscription_ids, timerange_args, epoch_cutoff
)
if features.has("organizations:uptime-eap-uptime-results-query", organization):
eap_response = self._make_eap_request(
organization,
projects,
subscription_ids,
timerange_args,
epoch_cutoff,
TraceItemType.TRACE_ITEM_TYPE_UPTIME_RESULT,
"guid",
"subscription_id",
include_request_sequence_filter=True,
)
formatted_response = self._format_response(eap_response, "subscription_id")
else:
eap_response = self._make_eap_request(
organization,
projects,
subscription_ids,
timerange_args,
epoch_cutoff,
TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK,
"uptime_check_id",
"uptime_subscription_id",
)
formatted_response = self._format_response(eap_response, "uptime_subscription_id")
except Exception:
logger.exception("Error making EAP RPC request for uptime check stats")
return self.respond("error making request", status=400)

formatted_response = self._format_response(eap_response, epoch_cutoff)

# Map the response back to project uptime subscription ids
mapped_response = self._map_response_to_project_uptime_subscription_ids(
subscription_id_to_project_uptime_subscription_id, formatted_response
Expand Down Expand Up @@ -127,7 +151,7 @@ def _authorize_and_map_project_uptime_subscription_ids(
}

validated_subscription_ids = [
project_uptime_subscription[1]
str(uuid.UUID(project_uptime_subscription[1]))
for project_uptime_subscription in project_uptime_subscriptions
if project_uptime_subscription[1] is not None
]
Expand All @@ -141,6 +165,10 @@ def _make_eap_request(
subscription_ids: list[str],
timerange_args: StatsArgsDict,
epoch_cutoff: datetime.datetime | None,
trace_item_type: TraceItemType.ValueType,
aggregation_key: str,
subscription_key: str,
include_request_sequence_filter: bool = False,
) -> TimeSeriesResponse:

eap_query_start = timerange_args["start"]
Expand All @@ -151,27 +179,56 @@ def _make_eap_request(
start_timestamp.FromDatetime(eap_query_start)
end_timestamp = Timestamp()
end_timestamp.FromDatetime(timerange_args["end"])

subscription_filter = TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
name=subscription_key,
type=AttributeKey.Type.TYPE_STRING,
),
op=ComparisonFilter.OP_IN,
value=AttributeValue(val_str_array=StrArray(values=subscription_ids)),
)
)

if include_request_sequence_filter:
request_sequence_filter = TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
name="request_sequence",
type=AttributeKey.Type.TYPE_INT,
),
op=ComparisonFilter.OP_EQUALS,
value=AttributeValue(val_int=0),
)
)
query_filter = TraceItemFilter(
and_filter=AndFilter(filters=[subscription_filter, request_sequence_filter])
)
else:
query_filter = subscription_filter

request = TimeSeriesRequest(
meta=RequestMeta(
organization_id=organization.id,
project_ids=[project.id for project in projects],
trace_item_type=TraceItemType.TRACE_ITEM_TYPE_UPTIME_CHECK,
trace_item_type=trace_item_type,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
),
aggregations=[
AttributeAggregation(
aggregate=Function.FUNCTION_COUNT,
key=AttributeKey(
name="uptime_check_id",
name=aggregation_key,
type=AttributeKey.Type.TYPE_STRING,
),
label="count()",
)
],
group_by=[
AttributeKey(
name="uptime_subscription_id",
name=subscription_key,
type=AttributeKey.Type.TYPE_STRING,
),
AttributeKey(
Expand All @@ -184,32 +241,28 @@ def _make_eap_request(
),
],
granularity_secs=timerange_args["rollup"],
filter=TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
name="uptime_subscription_id",
type=AttributeKey.Type.TYPE_STRING,
),
op=ComparisonFilter.OP_IN,
value=AttributeValue(val_str_array=StrArray(values=subscription_ids)),
)
),
filter=query_filter,
)
responses = timeseries_rpc([request])
assert len(responses) == 1
return responses[0]

def _format_response(
self, response: TimeSeriesResponse, epoch_cutoff: datetime.datetime | None = None
self, response: TimeSeriesResponse, subscription_key: str
) -> dict[str, list[tuple[int, dict[str, int]]]]:
"""
Formats the response from the EAP RPC request into a dictionary of subscription ids to a list of tuples
of timestamps and a dictionary of check statuses to counts.

Args:
response: The EAP RPC TimeSeriesResponse
subscription_key: The attribute name for subscription ID ("uptime_subscription_id" or "subscription_id")
epoch_cutoff: Optional cutoff timestamp for data
"""
formatted_data: dict[str, dict[int, dict[str, int]]] = {}

for timeseries in response.result_timeseries:
subscription_id = timeseries.group_by_attributes["uptime_subscription_id"]
subscription_id = timeseries.group_by_attributes[subscription_key]
status = timeseries.group_by_attributes["check_status"]
incident_status = timeseries.group_by_attributes["incident_status"]

Expand Down
108 changes: 77 additions & 31 deletions tests/sentry/uptime/endpoints/test_organization_uptime_stats.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
import uuid
from datetime import datetime, timedelta, timezone

from sentry.testutils.cases import UptimeCheckSnubaTestCase
from sentry.testutils.cases import APITestCase, UptimeCheckSnubaTestCase
from sentry.testutils.helpers.datetime import freeze_time
from sentry.testutils.helpers.options import override_options
from sentry.uptime.endpoints.organization_uptime_stats import add_extra_buckets_for_epoch_cutoff
from sentry.uptime.types import IncidentStatus
from sentry.utils import json
from tests.sentry.uptime.endpoints.test_organization_uptime_alert_index import (
OrganizationUptimeAlertIndexBaseEndpointTest,
from tests.snuba.api.endpoints.test_organization_events_uptime_results import (
UptimeResultEAPTestCase,
)

MOCK_DATETIME = datetime.now(tz=timezone.utc) - timedelta(days=1)


@freeze_time(MOCK_DATETIME)
class OrganizationUptimeCheckIndexEndpointTest(
OrganizationUptimeAlertIndexBaseEndpointTest, UptimeCheckSnubaTestCase
):
class OrganizationUptimeStatsBaseTest(APITestCase):
__test__ = False
endpoint = "sentry-api-0-organization-uptime-stats"

def setUp(self):
Expand All @@ -30,19 +28,23 @@ def setUp(self):
self.project_uptime_subscription = self.create_project_uptime_subscription(
uptime_subscription=self.subscription
)
scenarios: list[dict] = [
{"check_status": "success"},
{"check_status": "failure"},
{"check_status": "success"},
{"check_status": "failure"},
{"check_status": "success"},
{"check_status": "failure"},
{"check_status": "failure"},
{"check_status": "failure", "incident_status": IncidentStatus.IN_INCIDENT},
]

self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="success")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="failure")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="success")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="failure")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="success")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="failure")
self.store_snuba_uptime_check(subscription_id=self.subscription_id, check_status="failure")
self.store_snuba_uptime_check(
subscription_id=self.subscription_id,
check_status="failure",
incident_status=IncidentStatus.IN_INCIDENT,
)
for scenario in scenarios:
self.store_uptime_data(self.subscription_id, **scenario)

def store_uptime_data(self, subscription_id, check_status, **kwargs):
"""Store a single uptime data row. Must be implemented by subclasses."""
raise NotImplementedError("Subclasses must implement store_uptime_data")

def test_simple(self):
"""Test that the endpoint returns data for a simple uptime check."""
Expand Down Expand Up @@ -120,20 +122,15 @@ def test_simple_with_date_cutoff_rounded_resolution_past_cutoff(self):
uptime_subscription=subscription
)

self.store_snuba_uptime_check(
subscription_id=subscription_id,
check_status="success",
scheduled_check_time=(MOCK_DATETIME - timedelta(days=5)),
# Store data for the cutoff test scenario
self.store_uptime_data(
subscription_id, "success", scheduled_check_time=(MOCK_DATETIME - timedelta(days=5))
)
self.store_snuba_uptime_check(
subscription_id=subscription_id,
check_status="failure",
scheduled_check_time=MOCK_DATETIME - timedelta(days=5),
self.store_uptime_data(
subscription_id, "failure", scheduled_check_time=MOCK_DATETIME - timedelta(days=5)
)
self.store_snuba_uptime_check(
subscription_id=subscription_id,
check_status="failure",
scheduled_check_time=MOCK_DATETIME - timedelta(hours=2),
self.store_uptime_data(
subscription_id, "failure", scheduled_check_time=MOCK_DATETIME - timedelta(hours=2)
)

response = self.get_success_response(
Expand Down Expand Up @@ -230,6 +227,27 @@ def test_too_many_uptime_subscription_ids(self):
)


@freeze_time(MOCK_DATETIME)
class OrganizationUptimeCheckIndexEndpointTest(
OrganizationUptimeStatsBaseTest, UptimeCheckSnubaTestCase
):
__test__ = True

def store_uptime_data(self, subscription_id, check_status, **kwargs):
default_timestamp = datetime.now(timezone.utc) - timedelta(hours=12)
self.store_snuba_uptime_check(
subscription_id=subscription_id,
check_status=check_status,
incident_status=kwargs.get("incident_status", IncidentStatus.NO_INCIDENT),
scheduled_check_time=kwargs.get("scheduled_check_time", default_timestamp),
**{
k: v
for k, v in kwargs.items()
if k not in ["incident_status", "scheduled_check_time"]
},
)


# TODO(jferg): remove after 90 days
def test_add_extra_buckets_for_epoch_cutoff():
"""Test adding extra buckets when there's an epoch cutoff"""
Expand Down Expand Up @@ -275,3 +293,31 @@ def test_add_extra_buckets_for_epoch_cutoff():
# Test with no epoch cutoff - should return original
result = add_extra_buckets_for_epoch_cutoff(formatted_response, None, rollup, start, end)
assert result == formatted_response


@freeze_time(MOCK_DATETIME)
class OrganizationUptimeStatsEndpointWithEAPTests(
OrganizationUptimeStatsBaseTest, UptimeResultEAPTestCase, UptimeCheckSnubaTestCase
):
__test__ = True

def setUp(self):
super().setUp()
self.features = {
"organizations:uptime-eap-enabled": True,
"organizations:uptime-eap-uptime-results-query": False, # Use legacy for now
}

def store_uptime_data(self, subscription_id, check_status, **kwargs):
default_timestamp = datetime.now(timezone.utc) - timedelta(hours=12)
self.store_snuba_uptime_check(
subscription_id=subscription_id,
check_status=check_status,
incident_status=kwargs.get("incident_status", IncidentStatus.NO_INCIDENT),
scheduled_check_time=kwargs.get("scheduled_check_time", default_timestamp),
**{
k: v
for k, v in kwargs.items()
if k not in ["incident_status", "scheduled_check_time"]
},
)
Loading