From c849fd5909992e12afcd6195cc35bbade088c235 Mon Sep 17 00:00:00 2001 From: Nick Pillitteri <56quarters@users.noreply.github.com> Date: Wed, 22 Jan 2025 19:17:36 -0500 Subject: [PATCH] Add visibility when subqueries are rewritten (#10502) * Add visibility when subqueries are rewritten Follow up to #10445, add a metric when queries are rewritten for compatibility with Prometheus 3 range selector semantics. Signed-off-by: Nick Pillitteri * Skip TestSampleTracker_Concurrency since it is not reliable Signed-off-by: Nick Pillitteri --------- Signed-off-by: Nick Pillitteri --- CHANGELOG.md | 2 +- pkg/costattribution/sample_tracker_test.go | 3 +++ .../querymiddleware/prom2_range_compat.go | 26 ++++++++++++++----- .../prom2_range_compat_test.go | 3 ++- pkg/frontend/querymiddleware/roundtrip.go | 2 +- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbfdff3cae..6da31cafde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ * [BUGFIX] PromQL: Fix functions with histograms https://github.com/prometheus/prometheus/pull/15711 #10400 * [BUGFIX] MQE: Fix functions with histograms #10400 * [BUGFIX] Distributor: return HTTP status 415 Unsupported Media Type instead of 200 Success for Remote Write 2.0 until we support it. #10423 -* [BUGFIX] Query-frontend: Add flag `-query-frontend.prom2-range-compat` and corresponding YAML to rewrite queries with ranges that worked in Prometheus 2 but are invalid in Prometheus 3. #10445 #10461 +* [BUGFIX] Query-frontend: Add flag `-query-frontend.prom2-range-compat` and corresponding YAML to rewrite queries with ranges that worked in Prometheus 2 but are invalid in Prometheus 3. #10445 #10461 #10502 * [BUGFIX] Distributor: Fix edge case at the HA-tracker with memberlist as KVStore, where when a replica in the KVStore is marked as deleted but not yet removed, it fails to update the KVStore. #10443 ### Mixin diff --git a/pkg/costattribution/sample_tracker_test.go b/pkg/costattribution/sample_tracker_test.go index 0ad22a1edf..b68d4eff67 100644 --- a/pkg/costattribution/sample_tracker_test.go +++ b/pkg/costattribution/sample_tracker_test.go @@ -122,6 +122,9 @@ func TestSampleTracker_inactiveObservations(t *testing.T) { } func TestSampleTracker_Concurrency(t *testing.T) { + // Disabled due to spurious failures. See https://github.com/grafana/mimir/issues/10482 + t.Skip() + m := newTestManager() st := m.SampleTracker("user1") diff --git a/pkg/frontend/querymiddleware/prom2_range_compat.go b/pkg/frontend/querymiddleware/prom2_range_compat.go index 9a53ccabc4..7ccfc281ff 100644 --- a/pkg/frontend/querymiddleware/prom2_range_compat.go +++ b/pkg/frontend/querymiddleware/prom2_range_compat.go @@ -7,6 +7,8 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/tenant" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/promql/parser" apierror "github.com/grafana/mimir/pkg/api/error" @@ -17,20 +19,27 @@ import ( // newProm2RangeCompatMiddleware creates a new query middleware that adjusts range and resolution // selectors in subqueries in some cases for compatibility with Prometheus 3. -func newProm2RangeCompatMiddleware(limits Limits, logger log.Logger) MetricsQueryMiddleware { +func newProm2RangeCompatMiddleware(limits Limits, logger log.Logger, reg prometheus.Registerer) MetricsQueryMiddleware { + rewritten := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_frontend_prom2_range_compat_rewritten_total", + Help: "The number of times a query was rewritten for Prometheus 2/3 range selector compatibility", + }, []string{"user"}) + return MetricsQueryMiddlewareFunc(func(next MetricsQueryHandler) MetricsQueryHandler { return &prom2RangeCompatHandler{ - next: next, - limits: limits, - logger: logger, + next: next, + limits: limits, + logger: logger, + rewritten: rewritten, } }) } type prom2RangeCompatHandler struct { - next MetricsQueryHandler - limits Limits - logger log.Logger + next MetricsQueryHandler + limits Limits + logger log.Logger + rewritten *prometheus.CounterVec } func (c *prom2RangeCompatHandler) Do(ctx context.Context, r MetricsQueryRequest) (Response, error) { @@ -52,8 +61,11 @@ func (c *prom2RangeCompatHandler) Do(ctx context.Context, r MetricsQueryRequest) rewrittenQuery := rewritten.String() if origQuery != rewrittenQuery { + tenantIDString := tenant.JoinTenantIDs(tenantIDs) + c.rewritten.WithLabelValues(tenantIDString).Inc() spanLog.DebugLog( "msg", "modified subquery for compatibility with Prometheus 3 range selectors", + "tenants", tenantIDString, "original", origQuery, "rewritten", rewrittenQuery, ) diff --git a/pkg/frontend/querymiddleware/prom2_range_compat_test.go b/pkg/frontend/querymiddleware/prom2_range_compat_test.go index 745fdd0f98..66cf73ee9f 100644 --- a/pkg/frontend/querymiddleware/prom2_range_compat_test.go +++ b/pkg/frontend/querymiddleware/prom2_range_compat_test.go @@ -8,6 +8,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/user" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -23,7 +24,7 @@ func TestProm2RangeCompat_Do(t *testing.T) { } runHandler := func(ctx context.Context, inner MetricsQueryHandler, limits Limits, req MetricsQueryRequest) (Response, error) { - middleware := newProm2RangeCompatMiddleware(limits, log.NewNopLogger()) + middleware := newProm2RangeCompatMiddleware(limits, log.NewNopLogger(), prometheus.NewPedanticRegistry()) handler := middleware.Wrap(inner) return handler.Do(ctx, req) } diff --git a/pkg/frontend/querymiddleware/roundtrip.go b/pkg/frontend/querymiddleware/roundtrip.go index a67e1f5d43..e257e5b4dd 100644 --- a/pkg/frontend/querymiddleware/roundtrip.go +++ b/pkg/frontend/querymiddleware/roundtrip.go @@ -357,7 +357,7 @@ func newQueryMiddlewares( metrics := newInstrumentMiddlewareMetrics(registerer) queryBlockerMiddleware := newQueryBlockerMiddleware(limits, log, registerer) queryStatsMiddleware := newQueryStatsMiddleware(registerer, engine) - prom2CompatMiddleware := newProm2RangeCompatMiddleware(limits, log) + prom2CompatMiddleware := newProm2RangeCompatMiddleware(limits, log, registerer) remoteReadMiddleware = append(remoteReadMiddleware, // Track query range statistics. Added first before any subsequent middleware modifies the request.