Experiment with different batch criteria

timescale · Nov 3, 2022 · b1847ce · b1847ce
1 parent c348455
commit b1847ce
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 16 deletions.
diff --git a/pkg/pgmodel/ingestor/copier.go b/pkg/pgmodel/ingestor/copier.go
@@ -213,14 +213,12 @@ func copierGetBatch(ctx context.Context, batch []readRequest, reservationQ *Rese
 	metrics.IngestorPipelineTime.With(labelsCopier).Observe(time.Since(startTime).Seconds())
 	span.AddEvent("After sleep")
 
-	batch, _ = reservationQ.PopOntoBatch(batch)
+	var reason string
+	batch, _, reason = reservationQ.PopOntoBatch(batch)
+	metrics.IngestorBatchFlushTotal.With(prometheus.Labels{"type": "metric", "subsystem": "copier", "reason": reason}).Inc()
+
+	span.AddEvent("Flushed due to" + reason)
 
-	if len(batch) == cap(batch) {
-		span.AddEvent("Batch is full")
-		metrics.IngestorBatchFlushTotal.With(prometheus.Labels{"type": "metric", "subsystem": "copier", "reason": "size"}).Inc()
-	} else {
-		metrics.IngestorBatchFlushTotal.With(prometheus.Labels{"type": "metric", "subsystem": "copier", "reason": "timeout"}).Inc()
-	}
 	metrics.IngestorBatchRemainingAfterFlushTotal.With(labelsCopier).Observe(float64(reservationQ.Len()))
 	span.SetAttributes(attribute.Int("num_batches", len(batch)))
 	return batch, true

diff --git a/pkg/pgmodel/ingestor/metric_batcher.go b/pkg/pgmodel/ingestor/metric_batcher.go
@@ -210,7 +210,7 @@ func sendBatches(firstReq *insertDataRequest, input chan *insertDataRequest, con
 			t = time.Time{}
 		}
 		metrics.IngestorPipelineTime.With(prometheus.Labels{"type": "metric", "subsystem": "metric_batcher"}).Observe(time.Since(t).Seconds())
-		reservation.Update(reservationQ, t)
+		reservation.Update(reservationQ, t, len(req.data))
 		addSpan.End()
 	}
 	//This channel in synchronous (no buffering). This provides backpressure
@@ -254,11 +254,13 @@ func sendBatches(firstReq *insertDataRequest, input chan *insertDataRequest, con
 		}
 
 		numSeries := pending.batch.CountSeries()
+		numSamples, numExemplars := pending.batch.Count()
 
 		select {
 		//try to send first, if not then keep batching
 		case copySender <- copyRequest{pending, info}:
 			metrics.IngestorFlushSeries.With(prometheus.Labels{"type": "metric", "subsystem": "metric_batcher"}).Observe(float64(numSeries))
+			metrics.IngestorFlushInsertables.With(prometheus.Labels{"type": "metric", "subsystem": "metric_batcher"}).Observe(float64(numSamples + numExemplars))
 			metrics.IngestorBatchDuration.With(prometheus.Labels{"type": "metric", "subsystem": "metric_batcher"}).Observe(time.Since(pending.Start).Seconds())
 			if pending.IsFull() {
 				metrics.IngestorBatchFlushTotal.With(prometheus.Labels{"type": "metric", "subsystem": "metric_batcher", "reason": "size"}).Inc()

diff --git a/pkg/pgmodel/ingestor/metric_batcher_test.go b/pkg/pgmodel/ingestor/metric_batcher_test.go
@@ -155,7 +155,7 @@ func TestSendBatches(t *testing.T) {
 	go sendBatches(firstReq, nil, nil, &pgmodel.MetricInfo{MetricID: 1, TableName: "test"}, reservationQ)
 	resos := make([]readRequest, 0, 1)
 	reservationQ.Peek()
-	resos, cnt := reservationQ.PopOntoBatch(resos)
+	resos, cnt, _ := reservationQ.PopOntoBatch(resos)
 	require.Equal(t, 1, cnt)
 	require.Equal(t, 1, len(resos))
 	batch := <-(resos[0].copySender)

diff --git a/pkg/pgmodel/ingestor/reservation.go b/pkg/pgmodel/ingestor/reservation.go
@@ -3,6 +3,7 @@ package ingestor
 import (
 	"container/heap"
 	"sync"
+	"sync/atomic"
 	"time"
 )
 
@@ -12,14 +13,17 @@ type reservation struct {
 
 	lock      sync.Mutex
 	startTime time.Time
+
+	items int64
 }
 
 func newReservation(cs <-chan copyRequest, startTime time.Time) *reservation {
-	return &reservation{cs, -1, sync.Mutex{}, startTime}
+	return &reservation{cs, -1, sync.Mutex{}, startTime, 1}
 }
 
-func (res *reservation) Update(rq *ReservationQueue, t time.Time) {
+func (res *reservation) Update(rq *ReservationQueue, t time.Time, num_insertables int) {
 	rest := res.GetStartTime()
+	atomic.AddInt64(&res.items, int64(num_insertables))
 
 	if t.Before(rest) {
 		//this should happen rarely
@@ -82,7 +86,7 @@ func (res *reservationQueueInternal) Pop() interface{} {
 }
 
 type Reservation interface {
-	Update(*ReservationQueue, time.Time)
+	Update(*ReservationQueue, time.Time, int)
 }
 
 type ReservationQueue struct {
@@ -151,17 +155,33 @@ func (rq *ReservationQueue) Peek() (time.Time, bool) {
 
 // PopBatch pops from the queue to populate the batch until either batch is full or the queue is empty.
 // never blocks. Returns number of requests pop'ed.
-func (rq *ReservationQueue) PopOntoBatch(batch []readRequest) ([]readRequest, int) {
+func (rq *ReservationQueue) PopOntoBatch(batch []readRequest) ([]readRequest, int, string) {
 	rq.lock.Lock()
 	defer rq.lock.Unlock()
 
 	count := 0
-	for len(batch) < cap(batch) && rq.q.Len() > 0 {
+	items := int64(0)
+	if rq.q.Len() > 0 {
+		items = atomic.LoadInt64(&(*rq.q)[0].items)
+	}
+	total_items := int64(0)
+	for len(batch) < cap(batch) && rq.q.Len() > 0 && (len(batch) == 0 || items+total_items < 20000) {
 		res := heap.Pop(rq.q).(*reservation)
 		batch = append(batch, readRequest{res.copySender})
 		count++
+		total_items += items
+		items = 0
+		if rq.q.Len() > 0 {
+			items = atomic.LoadInt64(&(*rq.q)[0].items)
+		}
+	}
+	reason := "timeout"
+	if !(len(batch) < cap(batch)) {
+		reason = "size_metrics"
+	} else if !(len(batch) == 0 || items+total_items < 20000) {
+		reason = "size_samples"
 	}
-	return batch, count
+	return batch, count, reason
 }
 
 func (rq *ReservationQueue) update(res *reservation) {

diff --git a/pkg/pgmodel/metrics/ingest.go b/pkg/pgmodel/metrics/ingest.go
@@ -14,7 +14,7 @@ const (
 	MetricBatcherChannelCap = 1000
 	// FlushSize defines the batch size. It is the maximum number of samples/exemplars per insert batch.
 	// This translates to the max array size that we pass into `insert_metric_row`
-	FlushSize           = 2000
+	FlushSize           = 10000
 	MaxInsertStmtPerTxn = 100
 )
 
@@ -279,6 +279,7 @@ func init() {
 		IngestorChannelCap,
 		IngestorChannelLenBatcher,
 		IngestorFlushSeries,
+		IngestorFlushInsertables,
 		IngestorInsertsPerBatch,
 		IngestorRowsPerBatch,
 		IngestorRowsPerInsert,