Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions frac/active_indexer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package frac

import (
"bytes"
"os"
"path/filepath"
"sync"
"testing"
"time"

"github.com/stretchr/testify/assert"

"github.com/ozontech/seq-db/cache"
"github.com/ozontech/seq-db/indexer"
"github.com/ozontech/seq-db/metric/stopwatch"
"github.com/ozontech/seq-db/seq"
"github.com/ozontech/seq-db/storage"
"github.com/ozontech/seq-db/tests/common"
"github.com/ozontech/seq-db/tokenizer"
)

func readFileAllAtOnce(filename string) ([][]byte, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
lines := bytes.Split(content, []byte{'\n'})
if len(lines) > 0 && len(lines[len(lines)-1]) == 0 {
lines = lines[:len(lines)-1]
}
return lines, nil
}

func splitLogsToBulks(data [][]byte, bulkSize int) []func() ([]byte, error) {
funcs := []func() ([]byte, error){}
for len(data) > 0 {
size := min(len(data), bulkSize)
funcs = append(funcs, testBufReader(data[0:size]))
data = data[size:]
}
return funcs
}

func testBufReader(data [][]byte) func() ([]byte, error) {
orig := data
return func() ([]byte, error) {
if len(data) == 0 {
data = orig
return nil, nil
}
line := data[0]
data = data[1:]
return line, nil
}
}

func getTestProcessor() *indexer.Processor {
mapping := seq.Mapping{
"clientip": seq.NewSingleType(seq.TokenizerTypeKeyword, "clientip", 1024),
"request": seq.NewSingleType(seq.TokenizerTypeText, "request", 1024),
"status": seq.NewSingleType(seq.TokenizerTypeKeyword, "status", 1024),
"size": seq.NewSingleType(seq.TokenizerTypeKeyword, "size", 1024),
}

tokenizers := map[seq.TokenizerType]tokenizer.Tokenizer{
seq.TokenizerTypeText: tokenizer.NewTextTokenizer(1024, false, true, 8192),
seq.TokenizerTypeKeyword: tokenizer.NewKeywordTokenizer(1024, false, true),
seq.TokenizerTypePath: tokenizer.NewPathTokenizer(1024, false, true),
seq.TokenizerTypeExists: tokenizer.NewExistsTokenizer(),
}

return indexer.NewProcessor(mapping, tokenizers, 0, 0, 0)

}

func BenchmarkIndexer(b *testing.B) {
idx := NewActiveIndexer(8, 8)
idx.Start()
defer idx.Stop()

dataDir := filepath.Join(b.TempDir(), "BenchmarkIndexing")
common.RecreateDir(dataDir)

allLogs, err := readFileAllAtOnce(filepath.Join(common.TestDataDir, "k8s.logs"))
readers := splitLogsToBulks(allLogs, 1000)
assert.NoError(b, err)

active := NewActive(
filepath.Join(dataDir, "test"),
idx,
storage.NewReadLimiter(1, nil),
cache.NewCache[[]byte](nil, nil),
cache.NewCache[[]byte](nil, nil),
&Config{},
)

processor := getTestProcessor()

b.Run("indexing", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
bulks := make([][]byte, 0, len(readers))
for _, readNext := range readers {
_, _, meta, _ := processor.ProcessBulk(time.Now(), nil, nil, readNext)
bulks = append(bulks, storage.CompressDocBlock(meta, nil, 3))
}
b.StartTimer()

wg := sync.WaitGroup{}
for _, meta := range bulks {
wg.Add(1)
idx.Index(active, meta, &wg, stopwatch.New())
}
wg.Wait()
}
})
}
43 changes: 15 additions & 28 deletions frac/active_lids.go
Original file line number Diff line number Diff line change
@@ -1,29 +1,12 @@
package frac

import (
"cmp"
"math"
"sort"
"slices"
"sync"
)

type queueIDs struct {
lids []uint32
mids []uint64
rids []uint64
}

func (p *queueIDs) Len() int { return len(p.lids) }
func (p *queueIDs) Less(i, j int) bool {
if p.mids[p.lids[i]] == p.mids[p.lids[j]] {
if p.rids[p.lids[i]] == p.rids[p.lids[j]] {
return p.lids[i] > p.lids[j]
}
return p.rids[p.lids[i]] > p.rids[p.lids[j]]
}
return p.mids[p.lids[i]] > p.mids[p.lids[j]]
}
func (p *queueIDs) Swap(i, j int) { p.lids[i], p.lids[j] = p.lids[j], p.lids[i] }

type TokenLIDs struct {
sortedMu sync.Mutex // global merge mutex, making the merge process strictly sequential
sorted []uint32 // slice of actual sorted and merged LIDs of token
Expand All @@ -39,16 +22,20 @@ func (tl *TokenLIDs) GetLIDs(mids, rids *UInt64s) []uint32 {
lids := tl.getQueuedLIDs()
if len(lids) != 0 {

midsVals := mids.GetVals()
ridsVals := rids.GetVals()
mids := mids.GetVals()
rids := rids.GetVals()

sort.Sort(&queueIDs{
lids: lids,
mids: midsVals,
rids: ridsVals,
slices.SortFunc(lids, func(i, j uint32) int {
if mids[i] == mids[j] {
if rids[i] == rids[j] {
return -cmp.Compare(i, j)
}
return -cmp.Compare(rids[i], rids[j])
}
return -cmp.Compare(mids[i], mids[j])
})

tl.sorted = mergeSorted(tl.sorted, lids, midsVals, ridsVals)
tl.sorted = mergeSorted(tl.sorted, lids, mids, rids)
}

return tl.sorted
Expand Down Expand Up @@ -98,7 +85,7 @@ func mergeSorted(right, left []uint32, mids, rids []uint64) []uint32 {
val := uint32(0)
prev := uint32(math.MaxUint32)

cmp := SeqIDCmp{
c := SeqIDCmp{
mid: mids,
rid: rids,
}
Expand All @@ -108,7 +95,7 @@ func mergeSorted(right, left []uint32, mids, rids []uint64) []uint32 {
for l != len(left) && r != len(right) {
ri, li := right[r], left[l]

switch cmp.compare(ri, li) {
switch c.compare(ri, li) {
case 0:
val = ri
r++
Expand Down
3 changes: 2 additions & 1 deletion indexer/metrics.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package indexer

import (
"github.com/ozontech/seq-db/metric"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/ozontech/seq-db/metric"
)

var (
Expand Down
3 changes: 2 additions & 1 deletion proxy/bulk/ingestor.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"sync/atomic"
"time"

"go.uber.org/zap"

"github.com/ozontech/seq-db/bytespool"
"github.com/ozontech/seq-db/consts"
"github.com/ozontech/seq-db/indexer"
Expand All @@ -17,7 +19,6 @@ import (
"github.com/ozontech/seq-db/proxy/stores"
"github.com/ozontech/seq-db/seq"
"github.com/ozontech/seq-db/tokenizer"
"go.uber.org/zap"
)

type MappingProvider interface {
Expand Down