Skip to content

Commit

Permalink
backend: add total active tickets metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
castaneai committed Mar 12, 2024
1 parent 82dc9a0 commit 0e763d2
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
6 changes: 4 additions & 2 deletions backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,12 @@ func (b *Backend) fetchActiveTickets(ctx context.Context, limit int64) ([]*pb.Ti
if err != nil {
return nil, fmt.Errorf("failed to fetch active ticket IDs: %w", err)
}
if len(activeTicketIDs) == 0 {
activeTicketCount := int64(len(activeTicketIDs))
b.metrics.recordTicketCountActive(ctx, activeTicketCount)
if activeTicketCount == 0 {
return nil, nil
}
if len(activeTicketIDs) > int(limit) {
if activeTicketCount > limit {
activeTicketIDs = activeTicketIDs[:limit]
}
tickets, err := b.store.GetTickets(ctx, activeTicketIDs)
Expand Down
17 changes: 9 additions & 8 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ minimatch Backend exposes metrics in OpenTelemetry format to help monitor perfor

## Metrics list

| Metrics Name | Type | Description |
|:--------------------------------------------|:----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `minimatch.backend.tickets_fetched` | Counter | Number of times Ticket has been fetched by backends. |
| `minimatch.backend.tickets_assigned` | Counter | Number of times match has been assigned to a Ticket by backends. If this value is extremely less than `minimatch.backend.tickets_fetched`, the matchmaking logic may be undesirable. |
| `minimatch.backend.fetch_tickets_latency` | Histogram | Latency of the time the Ticket has been fetched by backends. If this value is slow, you may have a Redis performance problem or a lock conflict with assign tickets or other backends. |
| `minimatch.backend.match_function_latency` | Histogram | Latency of Match Function calls. |
| `minimatch.backend.assigner_latency` | Histogram | Latency of Assigner calls. |
| `minimatch.backend.assign_to_redis_latency` | Histogram | Latency to write Assign results to Redis. If this value is slow, you may have a Redis performance problem or a lock conflict with tickets_fetched or other backends. |
| Metrics Name | Type | Description |
|:--------------------------------------------|:--------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `minimatch.backend.tickets.count` | UpDownCounter | Total number of tickets. **Do not sum** this counter, as a single backend counts all tickets. |
| `minimatch.backend.tickets_fetched` | Counter | Number of times Ticket has been fetched by backends. |
| `minimatch.backend.tickets_assigned` | Counter | Number of times match has been assigned to a Ticket by backends. If this value is extremely less than `minimatch.backend.tickets_fetched`, the matchmaking logic may be undesirable. |
| `minimatch.backend.fetch_tickets_latency` | Histogram | Latency of the time the Ticket has been fetched by backends. If this value is slow, you may have a Redis performance problem or a lock conflict with assign tickets or other backends. |
| `minimatch.backend.match_function_latency` | Histogram | Latency of Match Function calls. |
| `minimatch.backend.assigner_latency` | Histogram | Latency of Assigner calls. |
| `minimatch.backend.assign_to_redis_latency` | Histogram | Latency to write Assign results to Redis. If this value is slow, you may have a Redis performance problem or a lock conflict with tickets_fetched or other backends. |

## Meter provider

Expand Down
25 changes: 23 additions & 2 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package minimatch

import (
"context"
"sync/atomic"
"time"

"go.opentelemetry.io/otel/attribute"
Expand All @@ -18,16 +19,21 @@ var (
defaultHistogramBuckets = []float64{
.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10,
}
keyTicketStatus = attribute.Key("status")
attributeActiveTicket = keyTicketStatus.String("active")
)

type backendMetrics struct {
meter metric.Meter
ticketsFetched metric.Int64Counter
ticketsAssigned metric.Int64Counter
ticketCount metric.Int64ObservableUpDownCounter
fetchTicketsLatency metric.Float64Histogram
matchFunctionLatency metric.Float64Histogram
assignerLatency metric.Float64Histogram
assignToRedisLatency metric.Float64Histogram

ticketCountActive atomic.Int64
}

func newBackendMetrics(provider metric.MeterProvider) (*backendMetrics, error) {
Expand Down Expand Up @@ -64,15 +70,26 @@ func newBackendMetrics(provider metric.MeterProvider) (*backendMetrics, error) {
if err != nil {
return nil, err
}
return &backendMetrics{
metrics := &backendMetrics{
meter: meter,
ticketsFetched: ticketsFetched,
ticketsAssigned: ticketsAssigned,
fetchTicketsLatency: fetchTicketsLatency,
matchFunctionLatency: matchFunctionLatency,
assignerLatency: assignerLatency,
assignToRedisLatency: assignToRedisLatency,
}, nil
}
ticketCount, err := meter.Int64ObservableUpDownCounter("minimatch.backend.tickets.count",
metric.WithDescription("Total number of tickets. Do not sum this counter, as a single backend counts all tickets."),
metric.WithInt64Callback(func(ctx context.Context, o metric.Int64Observer) error {
o.Observe(metrics.ticketCountActive.Load(), metric.WithAttributes(attributeActiveTicket))
return nil
}))
if err != nil {
return nil, err
}
metrics.ticketCount = ticketCount
return metrics, nil
}

func (m *backendMetrics) recordMatchFunctionLatency(ctx context.Context, seconds float64, matchProfile *pb.MatchProfile) {
Expand All @@ -99,6 +116,10 @@ func (m *backendMetrics) recordAssignToRedisLatency(ctx context.Context, latency
m.assignToRedisLatency.Record(ctx, latency.Seconds())
}

func (m *backendMetrics) recordTicketCountActive(ctx context.Context, count int64) {
m.ticketCountActive.Store(count)
}

type matchFunctionWithMetrics struct {
mmf MatchFunction
metrics *backendMetrics
Expand Down
2 changes: 1 addition & 1 deletion pkg/statestore/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func (s *RedisStore) GetAssignment(ctx context.Context, ticketID string) (*pb.As
return s.getAssignment(ctx, redis, ticketID)
}

// The ActiveTicketIDs may still contain the ID of a ticket that was deleted by TTL.
// GetActiveTicketIDs may also retrieve tickets deleted by TTL.
// This is because the ticket index and Ticket data are stored in separate keys.
// The next `GetTicket` or `GetTickets` call will resolve this inconsistency.
func (s *RedisStore) GetActiveTicketIDs(ctx context.Context) ([]string, error) {
Expand Down

0 comments on commit 0e763d2

Please sign in to comment.