From 8955890e0c12e784821126b0aadc70c97ae915a2 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 16 Jan 2025 14:36:14 +0100 Subject: [PATCH] Revert "Merge pull request #15455 from bboreham/compact-cache-symbols" This reverts commit 4dacd7572a831f0ad8b83e785cb4da82be143a97, reversing changes made to 5e124cf4f2b9467e4ae1c679840005e727efd599. --- tsdb/index/index.go | 60 +++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 5878d934a..6a1064b35 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -111,6 +111,12 @@ func newCRC32() hash.Hash32 { return crc32.New(castagnoliTable) } +type symbolCacheEntry struct { + index uint32 + lastValueIndex uint32 + lastValue string +} + type PostingsEncoder func(*encoding.Encbuf, []uint32) error type PostingsDecoder func(encoding.Decbuf) (int, Postings, error) @@ -141,7 +147,7 @@ type Writer struct { symbols *Symbols symbolFile *fileutil.MmapFile lastSymbol string - symbolCache map[string]uint32 // From symbol to index in table. + symbolCache map[string]symbolCacheEntry labelIndexes []labelIndexHashEntry // Label index offsets. labelNames map[string]uint64 // Label names, and their usage. @@ -241,7 +247,7 @@ func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncode buf1: encoding.Encbuf{B: make([]byte, 0, 1<<22)}, buf2: encoding.Encbuf{B: make([]byte, 0, 1<<22)}, - symbolCache: make(map[string]uint32, 1<<16), + symbolCache: make(map[string]symbolCacheEntry, 1<<8), labelNames: make(map[string]uint64, 1<<8), crc32: newCRC32(), postingsEncoder: encoder, @@ -473,16 +479,29 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... w.buf2.PutUvarint(lset.Len()) if err := lset.Validate(func(l labels.Label) error { - nameIndex, ok := w.symbolCache[l.Name] + var err error + cacheEntry, ok := w.symbolCache[l.Name] + nameIndex := cacheEntry.index if !ok { - return fmt.Errorf("symbol entry for %q does not exist", l.Name) + nameIndex, err = w.symbols.ReverseLookup(l.Name) + if err != nil { + return fmt.Errorf("symbol entry for %q does not exist, %w", l.Name, err) + } } w.labelNames[l.Name]++ w.buf2.PutUvarint32(nameIndex) - valueIndex, ok := w.symbolCache[l.Value] - if !ok { - return fmt.Errorf("symbol entry for %q does not exist", l.Value) + valueIndex := cacheEntry.lastValueIndex + if !ok || cacheEntry.lastValue != l.Value { + valueIndex, err = w.symbols.ReverseLookup(l.Value) + if err != nil { + return fmt.Errorf("symbol entry for %q does not exist, %w", l.Value, err) + } + w.symbolCache[l.Name] = symbolCacheEntry{ + index: nameIndex, + lastValueIndex: valueIndex, + lastValue: l.Value, + } } w.buf2.PutUvarint32(valueIndex) return nil @@ -541,7 +560,6 @@ func (w *Writer) AddSymbol(sym string) error { return fmt.Errorf("symbol %q out-of-order", sym) } w.lastSymbol = sym - w.symbolCache[sym] = uint32(w.numSymbols) w.numSymbols++ w.buf1.Reset() w.buf1.PutUvarintStr(sym) @@ -611,10 +629,10 @@ func (w *Writer) writeLabelIndices() error { values := []uint32{} for d.Err() == nil && cnt > 0 { cnt-- - d.Uvarint() // Keycount. - name := d.UvarintBytes() // Label name. - value := d.UvarintBytes() // Label value. - d.Uvarint64() // Offset. + d.Uvarint() // Keycount. + name := d.UvarintBytes() // Label name. + value := yoloString(d.UvarintBytes()) // Label value. + d.Uvarint64() // Offset. if len(name) == 0 { continue // All index is ignored. } @@ -627,9 +645,9 @@ func (w *Writer) writeLabelIndices() error { values = values[:0] } current = name - sid, ok := w.symbolCache[string(value)] - if !ok { - return fmt.Errorf("symbol entry for %q does not exist", string(value)) + sid, err := w.symbols.ReverseLookup(value) + if err != nil { + return err } values = append(values, sid) } @@ -901,9 +919,9 @@ func (w *Writer) writePostingsToTmpFiles() error { nameSymbols := map[uint32]string{} for _, name := range batchNames { - sid, ok := w.symbolCache[name] - if !ok { - return fmt.Errorf("symbol entry for %q does not exist", name) + sid, err := w.symbols.ReverseLookup(name) + if err != nil { + return err } nameSymbols[sid] = name } @@ -940,9 +958,9 @@ func (w *Writer) writePostingsToTmpFiles() error { for _, name := range batchNames { // Write out postings for this label name. - sid, ok := w.symbolCache[name] - if !ok { - return fmt.Errorf("symbol entry for %q does not exist", name) + sid, err := w.symbols.ReverseLookup(name) + if err != nil { + return err } values := make([]uint32, 0, len(postings[sid])) for v := range postings[sid] {