ozontech · eguguchkin · Oct 7, 2025 · Oct 14, 2025
diff --git a/frac/active_indexer_test.go b/frac/active_indexer_test.go
@@ -0,0 +1,117 @@
+package frac
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/ozontech/seq-db/cache"
+	"github.com/ozontech/seq-db/indexer"
+	"github.com/ozontech/seq-db/metric/stopwatch"
+	"github.com/ozontech/seq-db/seq"
+	"github.com/ozontech/seq-db/storage"
+	"github.com/ozontech/seq-db/tests/common"
+	"github.com/ozontech/seq-db/tokenizer"
+)
+
+func readFileAllAtOnce(filename string) ([][]byte, error) {
+	content, err := os.ReadFile(filename)
+	if err != nil {
+		return nil, err
+	}
+	lines := bytes.Split(content, []byte{'\n'})
+	if len(lines) > 0 && len(lines[len(lines)-1]) == 0 {
+		lines = lines[:len(lines)-1]
+	}
+	return lines, nil
+}
+
+func splitLogsToBulks(data [][]byte, bulkSize int) []func() ([]byte, error) {
+	funcs := []func() ([]byte, error){}
+	for len(data) > 0 {
+		size := min(len(data), bulkSize)
+		funcs = append(funcs, testBufReader(data[0:size]))
+		data = data[size:]
+	}
+	return funcs
+}
+
+func testBufReader(data [][]byte) func() ([]byte, error) {
+	orig := data
+	return func() ([]byte, error) {
+		if len(data) == 0 {
+			data = orig
+			return nil, nil
+		}
+		line := data[0]
+		data = data[1:]
+		return line, nil
+	}
+}
+
+func getTestProcessor() *indexer.Processor {
+	mapping := seq.Mapping{
+		"clientip": seq.NewSingleType(seq.TokenizerTypeKeyword, "clientip", 1024),
+		"request":  seq.NewSingleType(seq.TokenizerTypeText, "request", 1024),
+		"status":   seq.NewSingleType(seq.TokenizerTypeKeyword, "status", 1024),
+		"size":     seq.NewSingleType(seq.TokenizerTypeKeyword, "size", 1024),
+	}
+
+	tokenizers := map[seq.TokenizerType]tokenizer.Tokenizer{
+		seq.TokenizerTypeText:    tokenizer.NewTextTokenizer(1024, false, true, 8192),
+		seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(1024, false, true),
+		seq.TokenizerTypePath:    tokenizer.NewPathTokenizer(1024, false, true),
+		seq.TokenizerTypeExists:  tokenizer.NewExistsTokenizer(),
+	}
+
+	return indexer.NewProcessor(mapping, tokenizers, 0, 0, 0)
+
+}
+
+func BenchmarkIndexer(b *testing.B) {
+	idx := NewActiveIndexer(8, 8)
+	idx.Start()
+	defer idx.Stop()
+
+	dataDir := filepath.Join(b.TempDir(), "BenchmarkIndexing")
+	common.RecreateDir(dataDir)
+
+	allLogs, err := readFileAllAtOnce(filepath.Join(common.TestDataDir, "k8s.logs"))
+	readers := splitLogsToBulks(allLogs, 1000)
+	assert.NoError(b, err)
+
+	active := NewActive(
+		filepath.Join(dataDir, "test"),
+		idx,
+		storage.NewReadLimiter(1, nil),
+		cache.NewCache[[]byte](nil, nil),
+		cache.NewCache[[]byte](nil, nil),
+		&Config{},
+	)
+
+	processor := getTestProcessor()
+
+	b.Run("indexing", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			b.StopTimer()
+			bulks := make([][]byte, 0, len(readers))
+			for _, readNext := range readers {
+				_, _, meta, _ := processor.ProcessBulk(time.Now(), nil, nil, readNext)
+				bulks = append(bulks, storage.CompressDocBlock(meta, nil, 3))
+			}
+			b.StartTimer()
+
+			wg := sync.WaitGroup{}
+			for _, meta := range bulks {
+				wg.Add(1)
+				idx.Index(active, meta, &wg, stopwatch.New())
+			}
+			wg.Wait()
+		}
+	})
+}
diff --git a/frac/active_lids.go b/frac/active_lids.go
@@ -1,29 +1,12 @@
 package frac
 
 import (
+	"cmp"
 	"math"
-	"sort"
+	"slices"
 	"sync"
 )
 
-type queueIDs struct {
-	lids []uint32
-	mids []uint64
-	rids []uint64
-}
-
-func (p *queueIDs) Len() int { return len(p.lids) }
-func (p *queueIDs) Less(i, j int) bool {
-	if p.mids[p.lids[i]] == p.mids[p.lids[j]] {
-		if p.rids[p.lids[i]] == p.rids[p.lids[j]] {
-			return p.lids[i] > p.lids[j]
-		}
-		return p.rids[p.lids[i]] > p.rids[p.lids[j]]
-	}
-	return p.mids[p.lids[i]] > p.mids[p.lids[j]]
-}
-func (p *queueIDs) Swap(i, j int) { p.lids[i], p.lids[j] = p.lids[j], p.lids[i] }
-
 type TokenLIDs struct {
 	sortedMu sync.Mutex // global merge mutex, making the merge process strictly sequential
 	sorted   []uint32   // slice of actual sorted and merged LIDs of token
@@ -39,16 +22,20 @@ func (tl *TokenLIDs) GetLIDs(mids, rids *UInt64s) []uint32 {
 	lids := tl.getQueuedLIDs()
 	if len(lids) != 0 {
 
-		midsVals := mids.GetVals()
-		ridsVals := rids.GetVals()
+		mids := mids.GetVals()
+		rids := rids.GetVals()
 
-		sort.Sort(&queueIDs{
-			lids: lids,
-			mids: midsVals,
-			rids: ridsVals,
+		slices.SortFunc(lids, func(i, j uint32) int {
+			if mids[i] == mids[j] {
+				if rids[i] == rids[j] {
+					return -cmp.Compare(i, j)
+				}
+				return -cmp.Compare(rids[i], rids[j])
+			}
+			return -cmp.Compare(mids[i], mids[j])
 		})
 
-		tl.sorted = mergeSorted(tl.sorted, lids, midsVals, ridsVals)
+		tl.sorted = mergeSorted(tl.sorted, lids, mids, rids)
 	}
 
 	return tl.sorted
@@ -98,7 +85,7 @@ func mergeSorted(right, left []uint32, mids, rids []uint64) []uint32 {
 	val := uint32(0)
 	prev := uint32(math.MaxUint32)
 
-	cmp := SeqIDCmp{
+	c := SeqIDCmp{
 		mid: mids,
 		rid: rids,
 	}
@@ -108,7 +95,7 @@ func mergeSorted(right, left []uint32, mids, rids []uint64) []uint32 {
 	for l != len(left) && r != len(right) {
 		ri, li := right[r], left[l]
 
-		switch cmp.compare(ri, li) {
+		switch c.compare(ri, li) {
 		case 0:
 			val = ri
 			r++

diff --git a/indexer/metrics.go b/indexer/metrics.go
@@ -1,9 +1,10 @@
 package indexer
 
 import (
-	"github.com/ozontech/seq-db/metric"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
+
+	"github.com/ozontech/seq-db/metric"
 )
 
 var (

diff --git a/proxy/bulk/ingestor.go b/proxy/bulk/ingestor.go
@@ -8,6 +8,8 @@ import (
 	"sync/atomic"
 	"time"
 
+	"go.uber.org/zap"
+
 	"github.com/ozontech/seq-db/bytespool"
 	"github.com/ozontech/seq-db/consts"
 	"github.com/ozontech/seq-db/indexer"
@@ -17,7 +19,6 @@ import (
 	"github.com/ozontech/seq-db/proxy/stores"
 	"github.com/ozontech/seq-db/seq"
 	"github.com/ozontech/seq-db/tokenizer"
-	"go.uber.org/zap"
 )
 
 type MappingProvider interface {