diff --git a/backend.go b/backend.go index 75f0071..b994880 100644 --- a/backend.go +++ b/backend.go @@ -161,10 +161,12 @@ func (b *Backend) fetchActiveTickets(ctx context.Context, limit int64) ([]*pb.Ti if err != nil { return nil, fmt.Errorf("failed to fetch active ticket IDs: %w", err) } - if len(activeTicketIDs) == 0 { + activeTicketCount := int64(len(activeTicketIDs)) + b.metrics.recordTicketCountActive(ctx, activeTicketCount) + if activeTicketCount == 0 { return nil, nil } - if len(activeTicketIDs) > int(limit) { + if activeTicketCount > limit { activeTicketIDs = activeTicketIDs[:limit] } tickets, err := b.store.GetTickets(ctx, activeTicketIDs) diff --git a/docs/metrics.md b/docs/metrics.md index 2b1bbd8..253e796 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -4,14 +4,15 @@ minimatch Backend exposes metrics in OpenTelemetry format to help monitor perfor ## Metrics list -| Metrics Name | Type | Description | -|:--------------------------------------------|:----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `minimatch.backend.tickets_fetched` | Counter | Number of times Ticket has been fetched by backends. | -| `minimatch.backend.tickets_assigned` | Counter | Number of times match has been assigned to a Ticket by backends. If this value is extremely less than `minimatch.backend.tickets_fetched`, the matchmaking logic may be undesirable. | -| `minimatch.backend.fetch_tickets_latency` | Histogram | Latency of the time the Ticket has been fetched by backends. If this value is slow, you may have a Redis performance problem or a lock conflict with assign tickets or other backends. | -| `minimatch.backend.match_function_latency` | Histogram | Latency of Match Function calls. | -| `minimatch.backend.assigner_latency` | Histogram | Latency of Assigner calls. | -| `minimatch.backend.assign_to_redis_latency` | Histogram | Latency to write Assign results to Redis. If this value is slow, you may have a Redis performance problem or a lock conflict with tickets_fetched or other backends. | +| Metrics Name | Type | Description | +|:--------------------------------------------|:--------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `minimatch.backend.tickets.count` | UpDownCounter | Total number of tickets. **Do not sum** this counter, as a single backend counts all tickets. | +| `minimatch.backend.tickets_fetched` | Counter | Number of times Ticket has been fetched by backends. | +| `minimatch.backend.tickets_assigned` | Counter | Number of times match has been assigned to a Ticket by backends. If this value is extremely less than `minimatch.backend.tickets_fetched`, the matchmaking logic may be undesirable. | +| `minimatch.backend.fetch_tickets_latency` | Histogram | Latency of the time the Ticket has been fetched by backends. If this value is slow, you may have a Redis performance problem or a lock conflict with assign tickets or other backends. | +| `minimatch.backend.match_function_latency` | Histogram | Latency of Match Function calls. | +| `minimatch.backend.assigner_latency` | Histogram | Latency of Assigner calls. | +| `minimatch.backend.assign_to_redis_latency` | Histogram | Latency to write Assign results to Redis. If this value is slow, you may have a Redis performance problem or a lock conflict with tickets_fetched or other backends. | ## Meter provider diff --git a/metrics.go b/metrics.go index 5c9865f..2df9603 100644 --- a/metrics.go +++ b/metrics.go @@ -2,6 +2,7 @@ package minimatch import ( "context" + "sync/atomic" "time" "go.opentelemetry.io/otel/attribute" @@ -18,16 +19,21 @@ var ( defaultHistogramBuckets = []float64{ .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, } + keyTicketStatus = attribute.Key("status") + attributeActiveTicket = keyTicketStatus.String("active") ) type backendMetrics struct { meter metric.Meter ticketsFetched metric.Int64Counter ticketsAssigned metric.Int64Counter + ticketCount metric.Int64ObservableUpDownCounter fetchTicketsLatency metric.Float64Histogram matchFunctionLatency metric.Float64Histogram assignerLatency metric.Float64Histogram assignToRedisLatency metric.Float64Histogram + + ticketCountActive atomic.Int64 } func newBackendMetrics(provider metric.MeterProvider) (*backendMetrics, error) { @@ -64,7 +70,7 @@ func newBackendMetrics(provider metric.MeterProvider) (*backendMetrics, error) { if err != nil { return nil, err } - return &backendMetrics{ + metrics := &backendMetrics{ meter: meter, ticketsFetched: ticketsFetched, ticketsAssigned: ticketsAssigned, @@ -72,7 +78,18 @@ func newBackendMetrics(provider metric.MeterProvider) (*backendMetrics, error) { matchFunctionLatency: matchFunctionLatency, assignerLatency: assignerLatency, assignToRedisLatency: assignToRedisLatency, - }, nil + } + ticketCount, err := meter.Int64ObservableUpDownCounter("minimatch.backend.tickets.count", + metric.WithDescription("Total number of tickets. Do not sum this counter, as a single backend counts all tickets."), + metric.WithInt64Callback(func(ctx context.Context, o metric.Int64Observer) error { + o.Observe(metrics.ticketCountActive.Load(), metric.WithAttributes(attributeActiveTicket)) + return nil + })) + if err != nil { + return nil, err + } + metrics.ticketCount = ticketCount + return metrics, nil } func (m *backendMetrics) recordMatchFunctionLatency(ctx context.Context, seconds float64, matchProfile *pb.MatchProfile) { @@ -99,6 +116,10 @@ func (m *backendMetrics) recordAssignToRedisLatency(ctx context.Context, latency m.assignToRedisLatency.Record(ctx, latency.Seconds()) } +func (m *backendMetrics) recordTicketCountActive(ctx context.Context, count int64) { + m.ticketCountActive.Store(count) +} + type matchFunctionWithMetrics struct { mmf MatchFunction metrics *backendMetrics diff --git a/pkg/statestore/redis.go b/pkg/statestore/redis.go index f0938fe..1d1f349 100644 --- a/pkg/statestore/redis.go +++ b/pkg/statestore/redis.go @@ -156,7 +156,7 @@ func (s *RedisStore) GetAssignment(ctx context.Context, ticketID string) (*pb.As return s.getAssignment(ctx, redis, ticketID) } -// The ActiveTicketIDs may still contain the ID of a ticket that was deleted by TTL. +// GetActiveTicketIDs may also retrieve tickets deleted by TTL. // This is because the ticket index and Ticket data are stored in separate keys. // The next `GetTicket` or `GetTickets` call will resolve this inconsistency. func (s *RedisStore) GetActiveTicketIDs(ctx context.Context) ([]string, error) {