From 38beb42e96f42dcc8199614bd6f994f0de2377eb Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 3 Jan 2025 10:51:56 +0000 Subject: [PATCH] OTLP: add CLI flag for 'quiet zero' (#10238) * OTLP: add CLI flag for 'quiet zero' So that we can update all ingesters before enabling this in distributors. * Use mimir-prometheus from jvp/reimplement-quiet-zeros branch * make generate-otlp * make doc * make reference-help * update mimir prometheus commit to 93fa7617c0419ba9e49c1980a153206e8bb47089 * hide flag from docs Signed-off-by: Jesus Vazquez * Introduce changelog entry --------- Signed-off-by: Jesus Vazquez Co-authored-by: Jesus Vazquez --- CHANGELOG.md | 1 + go.mod | 3 +-- go.sum | 4 ++-- pkg/api/api.go | 2 +- pkg/distributor/distributor.go | 4 ++++ pkg/distributor/otel.go | 6 ++++-- pkg/distributor/otel_test.go | 14 +++++++------- pkg/distributor/otlp/helper_generated.go | 12 ++++++++---- pkg/distributor/otlp/metrics_to_prw_generated.go | 1 + pkg/distributor/push_test.go | 2 +- .../prometheus/prometheus/model/value/value.go | 3 +++ .../otlptranslator/prometheusremotewrite/helper.go | 12 ++++++++---- .../prometheusremotewrite/metrics_to_prw.go | 1 + .../prometheus/prometheus/tsdb/head_append.go | 11 ++++++++++- .../prometheus/prometheus/tsdb/head_read_mimir.go | 7 +++++++ .../prometheus/prometheus/tsdb/head_wal.go | 7 +++++++ vendor/modules.txt | 4 ++-- 17 files changed, 68 insertions(+), 26 deletions(-) create mode 100644 vendor/github.com/prometheus/prometheus/tsdb/head_read_mimir.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c7e16d25f6..c6d542b8ee0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * [ENHANCEMENT] Distributor: Add native histogram support for `electedReplicaPropagationTime` metric in ha_tracker. #10264 * [ENHANCEMENT] Ingester: More efficient CPU/memory utilization-based read request limiting. #10325 * [ENHANCEMENT] Dashboards: Add Query-Scheduler <-> Querier Inflight Requests row to Query Reads and Remote Ruler reads dashboards. #10290 +* [ENHANCEMENT] OTLP: In addition to the flag `-distributor.otel-created-timestamp-zero-ingestion-enabled` there is now `-distributor.otel-start-time-quiet-zero` to convert OTel start timestamps to Prometheus QuietZeroNaNs. This flag is to make the change rollout safe between Ingesters and Distributors. #10238 * [BUGFIX] Distributor: Use a boolean to track changes while merging the ReplicaDesc components, rather than comparing the objects directly. #10185 * [BUGFIX] Querier: fix timeout responding to query-frontend when response size is very close to `-querier.frontend-client.grpc-max-send-msg-size`. #10154 * [BUGFIX] Query-frontend and querier: show warning/info annotations in some cases where they were missing (if a lazy querier was used). #10277 diff --git a/go.mod b/go.mod index fcefa33f397..fe8f7bf2a9f 100644 --- a/go.mod +++ b/go.mod @@ -286,8 +286,7 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -// Using a fork of Prometheus with Mimir-specific changes. -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241219104229-b50052711673 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250102152619-93fa7617c041 // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index 7c983d07a84..5a26457ed85 100644 --- a/go.sum +++ b/go.sum @@ -1283,8 +1283,8 @@ github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40 h1:1TeKhyS+pvzO github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40/go.mod h1:IGRj8oOoxwJbHBYl1+OhS9UjQR0dv6SQOep7HqmtyFU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20241219104229-b50052711673 h1:z3nSCBMtEMtD/LAIkwrHsT03n7qgeU+0M6rEMZQbxVI= -github.com/grafana/mimir-prometheus v0.0.0-20241219104229-b50052711673/go.mod h1:a5LEa2Vy87wOp0Vu6sLmEIR1V59fqH3QosOSiErAr30= +github.com/grafana/mimir-prometheus v0.0.0-20250102152619-93fa7617c041 h1:tZFQRbiyOW630aJ7r+p+N3kUWhMVeWLRGSjZsK9KA0s= +github.com/grafana/mimir-prometheus v0.0.0-20250102152619-93fa7617c041/go.mod h1:a5LEa2Vy87wOp0Vu6sLmEIR1V59fqH3QosOSiErAr30= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= github.com/grafana/prometheus-alertmanager v0.25.1-0.20240930132144-b5e64e81e8d3 h1:6D2gGAwyQBElSrp3E+9lSr7k8gLuP3Aiy20rweLWeBw= diff --git a/pkg/api/api.go b/pkg/api/api.go index fd8cdd261f4..bd8da8e6e80 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -267,7 +267,7 @@ func (a *API) RegisterDistributor(d *distributor.Distributor, pushConfig distrib ), true, false, "POST") a.RegisterRoute(OTLPPushEndpoint, distributor.OTLPHandler( pushConfig.MaxOTLPRequestSize, d.RequestBufferPool, a.sourceIPs, limits, pushConfig.OTelResourceAttributePromotionConfig, - pushConfig.RetryConfig, d.PushWithMiddlewares, d.PushMetrics, reg, a.logger, + pushConfig.RetryConfig, pushConfig.EnableStartTimeQuietZero, d.PushWithMiddlewares, d.PushMetrics, reg, a.logger, ), true, false, "POST") a.indexPage.AddLinks(defaultWeight, "Distributor", []IndexPageLink{ diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 41bf16ddb43..4e55b298ef7 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -249,6 +249,9 @@ type Config struct { // OTelResourceAttributePromotionConfig allows for specializing OTel resource attribute promotion. OTelResourceAttributePromotionConfig OTelResourceAttributePromotionConfig `yaml:"-"` + + // Change the implementation of OTel startTime from a real zero to a special NaN value. + EnableStartTimeQuietZero bool `yaml:"start_time_quiet_zero" category:"advanced" doc:"hidden"` } // PushWrapper wraps around a push. It is similar to middleware.Interface. @@ -267,6 +270,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { f.DurationVar(&cfg.RemoteTimeout, "distributor.remote-timeout", 2*time.Second, "Timeout for downstream ingesters.") f.BoolVar(&cfg.WriteRequestsBufferPoolingEnabled, "distributor.write-requests-buffer-pooling-enabled", true, "Enable pooling of buffers used for marshaling write requests.") f.IntVar(&cfg.ReusableIngesterPushWorkers, "distributor.reusable-ingester-push-workers", 2000, "Number of pre-allocated workers used to forward push requests to the ingesters. If 0, no workers will be used and a new goroutine will be spawned for each ingester push request. If not enough workers available, new goroutine will be spawned. (Note: this is a performance optimization, not a limiting feature.)") + f.BoolVar(&cfg.EnableStartTimeQuietZero, "distributor.otel-start-time-quiet-zero", false, "Change the implementation of OTel startTime from a real zero to a special NaN value.") cfg.DefaultLimits.RegisterFlags(f) } diff --git a/pkg/distributor/otel.go b/pkg/distributor/otel.go index 1f4621b8446..0c3570913e1 100644 --- a/pkg/distributor/otel.go +++ b/pkg/distributor/otel.go @@ -63,6 +63,7 @@ func OTLPHandler( limits OTLPHandlerLimits, resourceAttributePromotionConfig OTelResourceAttributePromotionConfig, retryCfg RetryConfig, + enableStartTimeQuietZero bool, push PushFunc, pushMetrics *PushMetrics, reg prometheus.Registerer, @@ -183,7 +184,7 @@ func OTLPHandler( pushMetrics.ObserveUncompressedBodySize(tenantID, float64(uncompressedBodySize)) var metrics []mimirpb.PreallocTimeseries - metrics, err = otelMetricsToTimeseries(ctx, tenantID, addSuffixes, enableCTZeroIngestion, promoteResourceAttributes, keepIdentifyingResourceAttributes, discardedDueToOtelParseError, spanLogger, otlpReq.Metrics()) + metrics, err = otelMetricsToTimeseries(ctx, tenantID, addSuffixes, enableCTZeroIngestion, enableStartTimeQuietZero, promoteResourceAttributes, keepIdentifyingResourceAttributes, discardedDueToOtelParseError, spanLogger, otlpReq.Metrics()) if err != nil { return err } @@ -413,11 +414,12 @@ func otelMetricsToMetadata(addSuffixes bool, md pmetric.Metrics) []*mimirpb.Metr return metadata } -func otelMetricsToTimeseries(ctx context.Context, tenantID string, addSuffixes, enableCTZeroIngestion bool, promoteResourceAttributes []string, keepIdentifyingResourceAttributes bool, discardedDueToOtelParseError *prometheus.CounterVec, logger log.Logger, md pmetric.Metrics) ([]mimirpb.PreallocTimeseries, error) { +func otelMetricsToTimeseries(ctx context.Context, tenantID string, addSuffixes, enableCTZeroIngestion, enableStartTimeQuietZero bool, promoteResourceAttributes []string, keepIdentifyingResourceAttributes bool, discardedDueToOtelParseError *prometheus.CounterVec, logger log.Logger, md pmetric.Metrics) ([]mimirpb.PreallocTimeseries, error) { converter := otlp.NewMimirConverter() _, errs := converter.FromMetrics(ctx, md, otlp.Settings{ AddMetricSuffixes: addSuffixes, EnableCreatedTimestampZeroIngestion: enableCTZeroIngestion, + EnableStartTimeQuietZero: enableStartTimeQuietZero, PromoteResourceAttributes: promoteResourceAttributes, KeepIdentifyingResourceAttributes: keepIdentifyingResourceAttributes, }, utillog.SlogFromGoKit(logger)) diff --git a/pkg/distributor/otel_test.go b/pkg/distributor/otel_test.go index 614784756cb..cc9d0cc8751 100644 --- a/pkg/distributor/otel_test.go +++ b/pkg/distributor/otel_test.go @@ -283,7 +283,7 @@ func TestOTelMetricsToTimeSeries(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { mimirTS, err := otelMetricsToTimeseries( - context.Background(), tenantID, true, false, tc.promoteResourceAttributes, tc.keepIdentifyingResourceAttributes, discardedDueToOTelParseError, log.NewNopLogger(), md, + context.Background(), tenantID, true, false, false, tc.promoteResourceAttributes, tc.keepIdentifyingResourceAttributes, discardedDueToOTelParseError, log.NewNopLogger(), md, ) require.NoError(t, err) require.Len(t, mimirTS, 2) @@ -351,7 +351,7 @@ func BenchmarkOTLPHandler(b *testing.B) { validation.NewMockTenantLimits(map[string]*validation.Limits{}), ) require.NoError(b, err) - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, pushFunc, nil, nil, log.NewNopLogger()) + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, pushFunc, nil, nil, log.NewNopLogger()) b.Run("protobuf", func(b *testing.B) { req := createOTLPProtoRequest(b, exportReq, "") @@ -750,7 +750,7 @@ func TestHandlerOTLPPush(t *testing.T) { logs := &concurrency.SyncBuffer{} retryConfig := RetryConfig{Enabled: true, MinBackoff: 5 * time.Second, MaxBackoff: 5 * time.Second} - handler := OTLPHandler(tt.maxMsgSize, nil, nil, limits, tt.resourceAttributePromotionConfig, retryConfig, pusher, nil, nil, level.NewFilter(log.NewLogfmtLogger(logs), level.AllowInfo())) + handler := OTLPHandler(tt.maxMsgSize, nil, nil, limits, tt.resourceAttributePromotionConfig, retryConfig, false, pusher, nil, nil, level.NewFilter(log.NewLogfmtLogger(logs), level.AllowInfo())) resp := httptest.NewRecorder() handler.ServeHTTP(resp, req) @@ -823,7 +823,7 @@ func TestHandler_otlpDroppedMetricsPanic(t *testing.T) { req := createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp := httptest.NewRecorder() - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() assert.NoError(t, err) assert.Len(t, request.Timeseries, 3) @@ -869,7 +869,7 @@ func TestHandler_otlpDroppedMetricsPanic2(t *testing.T) { req := createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp := httptest.NewRecorder() - handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler := OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() t.Cleanup(pushReq.CleanUp) require.NoError(t, err) @@ -895,7 +895,7 @@ func TestHandler_otlpDroppedMetricsPanic2(t *testing.T) { req = createOTLPProtoRequest(t, pmetricotlp.NewExportRequestFromMetrics(md), "") resp = httptest.NewRecorder() - handler = OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, func(_ context.Context, pushReq *Request) error { + handler = OTLPHandler(100000, nil, nil, limits, nil, RetryConfig{}, false, func(_ context.Context, pushReq *Request) error { request, err := pushReq.WriteRequest() t.Cleanup(pushReq.CleanUp) require.NoError(t, err) @@ -923,7 +923,7 @@ func TestHandler_otlpWriteRequestTooBigWithCompression(t *testing.T) { resp := httptest.NewRecorder() - handler := OTLPHandler(140, nil, nil, nil, nil, RetryConfig{}, readBodyPushFunc(t), nil, nil, log.NewNopLogger()) + handler := OTLPHandler(140, nil, nil, nil, nil, RetryConfig{}, false, readBodyPushFunc(t), nil, nil, log.NewNopLogger()) handler.ServeHTTP(resp, req) assert.Equal(t, http.StatusRequestEntityTooLarge, resp.Code) body, err := io.ReadAll(resp.Body) diff --git a/pkg/distributor/otlp/helper_generated.go b/pkg/distributor/otlp/helper_generated.go index b5dd5650bd3..8fbd4b3036e 100644 --- a/pkg/distributor/otlp/helper_generated.go +++ b/pkg/distributor/otlp/helper_generated.go @@ -597,9 +597,10 @@ const defaultIntervalForStartTimestamps = int64(300_000) // handleStartTime adds a zero sample at startTs only if startTs is within validIntervalForStartTimestamps of the sample timestamp. // The reason for doing this is that PRW v1 doesn't support Created Timestamps. After switching to PRW v2's direct CT support, // make use of its direct support fort Created Timestamps instead. +// See https://github.com/prometheus/prometheus/issues/14600 for context. // See https://opentelemetry.io/docs/specs/otel/metrics/data-model/#resets-and-gaps to know more about how OTel handles // resets for cumulative metrics. -func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.LabelAdapter, settings Settings, typ string, value float64, logger *slog.Logger) { +func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.LabelAdapter, settings Settings, typ string, val float64, logger *slog.Logger) { if !settings.EnableCreatedTimestampZeroIngestion { return } @@ -621,10 +622,13 @@ func (c *MimirConverter) handleStartTime(startTs, ts int64, labels []mimirpb.Lab return } - logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) + logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", val) - // See https://github.com/prometheus/prometheus/issues/14600 for context. - c.addSample(&mimirpb.Sample{TimestampMs: startTs}, labels) + var createdTimeValue float64 + if settings.EnableStartTimeQuietZero { + createdTimeValue = math.Float64frombits(value.QuietZeroNaN) + } + c.addSample(&mimirpb.Sample{TimestampMs: startTs, Value: createdTimeValue}, labels) } // handleHistogramStartTime similar to the method above but for native histograms.. diff --git a/pkg/distributor/otlp/metrics_to_prw_generated.go b/pkg/distributor/otlp/metrics_to_prw_generated.go index 5eb1391dadd..e9fdbbc8964 100644 --- a/pkg/distributor/otlp/metrics_to_prw_generated.go +++ b/pkg/distributor/otlp/metrics_to_prw_generated.go @@ -50,6 +50,7 @@ type Settings struct { // Mimir specifics. EnableCreatedTimestampZeroIngestion bool + EnableStartTimeQuietZero bool ValidIntervalCreatedTimestampZeroIngestion time.Duration } diff --git a/pkg/distributor/push_test.go b/pkg/distributor/push_test.go index 192769b1141..0e588b978e8 100644 --- a/pkg/distributor/push_test.go +++ b/pkg/distributor/push_test.go @@ -1183,7 +1183,7 @@ func TestOTLPPushHandlerErrorsAreReportedCorrectlyViaHttpgrpc(t *testing.T) { return nil } - h := OTLPHandler(200, util.NewBufferPool(0), nil, otlpLimitsMock{}, nil, RetryConfig{}, push, newPushMetrics(reg), reg, log.NewNopLogger()) + h := OTLPHandler(200, util.NewBufferPool(0), nil, otlpLimitsMock{}, nil, RetryConfig{}, false, push, newPushMetrics(reg), reg, log.NewNopLogger()) srv.HTTP.Handle("/otlp", h) // start the server diff --git a/vendor/github.com/prometheus/prometheus/model/value/value.go b/vendor/github.com/prometheus/prometheus/model/value/value.go index 655ce852d51..d3dd9b996fe 100644 --- a/vendor/github.com/prometheus/prometheus/model/value/value.go +++ b/vendor/github.com/prometheus/prometheus/model/value/value.go @@ -26,6 +26,9 @@ const ( // complicated values in the future. It is 2 rather than 1 to make // it easier to distinguish from the NormalNaN by a human when debugging. StaleNaN uint64 = 0x7ff0000000000002 + + // QuietZeroNaN signals TSDB to add a zero, but do nothing if there is already a value at that timestamp. + QuietZeroNaN uint64 = 0x7ff0000000000003 ) // IsStaleNaN returns true when the provided NaN value is a stale marker. diff --git a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 4f12d1f3470..82f3bb591ec 100644 --- a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -595,9 +595,10 @@ const defaultIntervalForStartTimestamps = int64(300_000) // handleStartTime adds a zero sample at startTs only if startTs is within validIntervalForStartTimestamps of the sample timestamp. // The reason for doing this is that PRW v1 doesn't support Created Timestamps. After switching to PRW v2's direct CT support, // make use of its direct support fort Created Timestamps instead. +// See https://github.com/prometheus/prometheus/issues/14600 for context. // See https://opentelemetry.io/docs/specs/otel/metrics/data-model/#resets-and-gaps to know more about how OTel handles // resets for cumulative metrics. -func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, value float64, logger *slog.Logger) { +func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, val float64, logger *slog.Logger) { if !settings.EnableCreatedTimestampZeroIngestion { return } @@ -619,10 +620,13 @@ func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb return } - logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) + logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", val) - // See https://github.com/prometheus/prometheus/issues/14600 for context. - c.addSample(&prompb.Sample{Timestamp: startTs}, labels) + var createdTimeValue float64 + if settings.EnableStartTimeQuietZero { + createdTimeValue = math.Float64frombits(value.QuietZeroNaN) + } + c.addSample(&prompb.Sample{Timestamp: startTs, Value: createdTimeValue}, labels) } // handleHistogramStartTime similar to the method above but for native histograms.. diff --git a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 7f0cc04a106..65fd0800476 100644 --- a/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/vendor/github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -47,6 +47,7 @@ type Settings struct { // Mimir specifics. EnableCreatedTimestampZeroIngestion bool + EnableStartTimeQuietZero bool ValidIntervalCreatedTimestampZeroIngestion time.Duration } diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_append.go b/vendor/github.com/prometheus/prometheus/tsdb/head_append.go index b64607a417e..a4def2bc918 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_append.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_append.go @@ -497,7 +497,7 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi if s.lastHistogramValue != nil || s.lastFloatHistogramValue != nil { return false, 0, storage.NewDuplicateHistogramToFloatErr(t, v) } - if math.Float64bits(s.lastValue) != math.Float64bits(v) { + if math.Float64bits(s.lastValue) != math.Float64bits(v) && math.Float64bits(v) != value.QuietZeroNaN { return false, 0, storage.NewDuplicateFloatErr(t, s.lastValue, v) } // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. @@ -505,6 +505,10 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi } } + if math.Float64bits(v) == value.QuietZeroNaN { // Say it's allowed; it will be dropped later in commitSamples. + return true, 0, nil + } + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { return true, headMaxt - t, nil @@ -1144,6 +1148,8 @@ func (a *headAppender) commitSamples(acc *appenderCommitContext) { switch { case err != nil: // Do nothing here. + case oooSample && math.Float64bits(s.V) == value.QuietZeroNaN: + // No-op: we don't store quiet zeros out-of-order. case oooSample: // Sample is OOO and OOO handling is enabled // and the delta is within the OOO tolerance. @@ -1190,6 +1196,9 @@ func (a *headAppender) commitSamples(acc *appenderCommitContext) { acc.floatsAppended-- } default: + if math.Float64bits(s.V) == value.QuietZeroNaN { + s.V = 0 // Note that this is modifying the copy which is what will be appended but the WAL got the NaN already. + } ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) if ok { if s.T < acc.inOrderMint { diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_read_mimir.go b/vendor/github.com/prometheus/prometheus/tsdb/head_read_mimir.go new file mode 100644 index 00000000000..7d9c765fdb5 --- /dev/null +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_read_mimir.go @@ -0,0 +1,7 @@ +package tsdb + +import "math" + +func (h *Head) MustIndex() IndexReader { + return h.indexRange(math.MinInt64, math.MaxInt64) +} diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go index b1f3abd1545..5b1a868837a 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" @@ -589,6 +590,9 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp if s.T <= ms.mmMaxTime { continue } + if math.Float64bits(s.V) == value.QuietZeroNaN { + s.V = 0 + } if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated { h.metrics.chunksCreated.Inc() h.metrics.chunks.Inc() @@ -989,6 +993,9 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHi unknownRefs++ continue } + if math.Float64bits(s.V) == value.QuietZeroNaN { + continue + } ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger) if chunkCreated { h.metrics.chunksCreated.Inc() diff --git a/vendor/modules.txt b/vendor/modules.txt index 180a59033de..05717732ad5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1039,7 +1039,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20241219104229-b50052711673 +# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20250102152619-93fa7617c041 ## explicit; go 1.22.0 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1711,7 +1711,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20241219104229-b50052711673 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250102152619-93fa7617c041 # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 # github.com/grafana/regexp => github.com/grafana/regexp v0.0.0-20240531075221-3685f1377d7b