From 48c2e61eeedf7b814c6adb6eeff99b6cb78ee61d Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Wed, 17 Apr 2024 10:23:12 +0200 Subject: [PATCH] part: Fix edge case in LowerBound() If the tree contained a leaf under a node that had matching prefix to the lowerbound search key, but which was smaller then we hit the "prefix smaller" case which incorrectly returned an empty set of edges to explore when it should've returned the larger set of edges from the parent(s). Reproducer from fuzz test for the bug: node4[[0 0 0 0 0 1]]:(0xc000176910) leaf[[18 226]]: [0 0 0 0 0 1 18 226] -> 70370(0xc00007e4b0) node48[[19]]:(0xc00014f6c0) leaf[[11]]: [0 0 0 0 0 1 19 11] -> 70411(0xc00007e6e0) leaf[[12]]: [0 0 0 0 0 1 19 12] -> 70412(0xc00007e730) leaf[[13]]: [0 0 0 0 0 1 19 13] -> 70413(0xc00007e910) leaf[[14]]: [0 0 0 0 0 1 19 14] -> 70414(0xc00007eaa0) Search key was [... 0 1 18 240 ], which lead the lowerbound search to "leaf[[18 226]]" and to the false conclusion that there is nothing in the tree above the search key, when in fact it should've returned "node48[[19]]" as the edge to explore. Added regression test for this exact reproducer, and edge case tests for lowerbound() and checked that all cases were covered. Removed the "len(key) == 0" branch as that is never taken since we compare to the prefix with "min(len(key), len(prefix)", so if len(key) < len(prefix) we always end up in the "-1" case instead. For ease of debugging in future, PrintTree() now uses %x instead of %v to print keys in hex. --- fuzz_test.go | 11 +-- part/iterator.go | 14 ++-- part/node.go | 12 ++-- part/part_test.go | 167 ++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 181 insertions(+), 23 deletions(-) diff --git a/fuzz_test.go b/fuzz_test.go index 8a7a0ea..1eb458d 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -44,10 +44,11 @@ func newDebugLogger(worker int) *debugLogger { } const ( - numUniqueIDs = 2000 - numUniqueValues = 3000 + numUniqueIDs = 3000 + numUniqueValues = 2000 numWorkers = 20 - numIterations = 5000 + numTrackers = 5 + numIterations = 1000 ) type fuzzObj struct { @@ -514,8 +515,8 @@ func TestDB_Fuzz(t *testing.T) { // Start change trackers to observe changes. stop := make(chan struct{}) var wg2 sync.WaitGroup - wg2.Add(3) - for i := 0; i < 3; i++ { + wg2.Add(numTrackers) + for i := 0; i < numTrackers; i++ { i := i go func() { trackerWorker(i, stop) diff --git a/part/iterator.go b/part/iterator.go index e124f35..f6a7ecd 100644 --- a/part/iterator.go +++ b/part/iterator.go @@ -111,22 +111,20 @@ loop: for { switch bytes.Compare(this.prefix, key[:min(len(key), len(this.prefix))]) { case -1: - // Prefix is smaller, which means there is no node smaller than - // the given lowerbound. - return &Iterator[T]{nil} + // Prefix is smaller, stop here and return an iterator for + // the larger nodes in the parent's. + break loop case 0: if len(this.prefix) == len(key) { // Exact match. edges = append(edges, []*header[T]{this}) break loop - } else if len(key) == 0 { - // Search key exhausted, find the minimum node. - edges = traverseToMin(this, edges) - break loop } - // Prefix matches, keep going. + // Prefix matches the beginning of the key, but more + // remains of the key. Drop the matching part and keep + // going further. key = key[len(this.prefix):] if this.kind() == nodeKind256 { diff --git a/part/node.go b/part/node.go index f88ac04..1b936a4 100644 --- a/part/node.go +++ b/part/node.go @@ -212,24 +212,24 @@ func (n *header[T]) printTree(level int) { var children []*header[T] switch n.kind() { case nodeKindLeaf: - fmt.Printf("leaf[%v]:", n.prefix) + fmt.Printf("leaf[%x]:", n.prefix) case nodeKind4: - fmt.Printf("node4[%v]:", n.prefix) + fmt.Printf("node4[%x]:", n.prefix) children = n.node4().children[:n.size()] case nodeKind16: - fmt.Printf("node16[%v]:", n.prefix) + fmt.Printf("node16[%x]:", n.prefix) children = n.node16().children[:n.size()] case nodeKind48: - fmt.Printf("node48[%v]:", n.prefix) + fmt.Printf("node48[%x]:", n.prefix) children = n.node48().children[:n.size()] case nodeKind256: - fmt.Printf("node256[%v]:", n.prefix) + fmt.Printf("node256[%x]:", n.prefix) children = n.node256().children[:] default: panic("unknown node kind") } if leaf := n.getLeaf(); leaf != nil { - fmt.Printf(" %v -> %v", leaf.key, leaf.value) + fmt.Printf(" %x -> %v", leaf.key, leaf.value) } fmt.Printf("(%p)\n", n) diff --git a/part/part_test.go b/part/part_test.go index dd628e4..6a9d145 100644 --- a/part/part_test.go +++ b/part/part_test.go @@ -5,6 +5,7 @@ package part import ( "encoding/binary" + "fmt" "math/rand" "testing" "time" @@ -58,6 +59,10 @@ func intKey(n uint64) []byte { return binary.BigEndian.AppendUint64(nil, n) } +func int32Key(n uint32) []byte { + return binary.BigEndian.AppendUint32(nil, n) +} + func Test_simple_delete(t *testing.T) { tree := New[uint64]() txn := tree.Txn() @@ -79,10 +84,10 @@ func Test_delete(t *testing.T) { tree := New[uint64]() // Do multiple rounds with the same tree. - for round := 0; round < 10; round++ { + for round := 0; round < 100; round++ { // Use a random amount of keys in random order to exercise different // tree structures each time. - numKeys := 10 + rand.Intn(5000) + numKeys := 10 + rand.Intn(1000) t.Logf("numKeys=%d", numKeys) keys := []uint64{} @@ -178,16 +183,34 @@ func Test_delete(t *testing.T) { assert.Equal(t, num, len(keys)) // Test that lowerbound iteration is ordered and correct - prev = keys[len(keys)/2] - iter = tree.LowerBound(intKey(prev + 1)) + idx := len(keys) / 2 + prev = keys[idx] + num = 0 + start := prev + 1 + iter = tree.LowerBound(intKey(start)) + obs := []uint64{} for { _, v, ok := iter.Next() if !ok { break } + num++ + obs = append(obs, v) require.Greater(t, v, prev) prev = v } + exp := 0 + for _, k := range keys { + if k >= start { + exp++ + } + } + if !assert.Equal(t, exp, num) { + t.Logf("LowerBound from %d failed", start) + t.Logf("observed: %v", obs) + tree.PrintTree() + t.Fatal() + } // Test that prefix iteration is ordered and correct prev = 0 @@ -491,6 +514,142 @@ func Test_lowerbound(t *testing.T) { next(false, 0) } +func Test_lowerbound_edge_cases(t *testing.T) { + tree := New[uint32]() + keys := []uint32{} + ins := func(n uint32) { + _, _, tree = tree.Insert(int32Key(n), n) + keys = append(keys, n) + fmt.Printf("%x:\n", keys) + tree.root.printTree(2) + } + + var iter *Iterator[uint32] + next := func(exOK bool, exVal int) { + t.Helper() + _, v, ok := iter.Next() + assert.Equal(t, exOK, ok) + require.EqualValues(t, exVal, v) + } + + // Empty tree + iter = tree.LowerBound([]byte{}) + next(false, 0) + iter = tree.LowerBound(int32Key(0x1)) + next(false, 0) + + // case 0: Leaf at the root + ins(0x1) + iter = tree.LowerBound([]byte{}) + next(true, 0x1) + next(false, 0) + iter = tree.LowerBound(int32Key(0x1)) + next(true, 0x1) + next(false, 0) + iter = tree.LowerBound(int32Key(0x2)) + next(false, 0) + + // Two leafs, node4 root + ins(0x2) + iter = tree.LowerBound([]byte{}) + next(true, 0x1) + next(true, 0x2) + next(false, 0) + iter = tree.LowerBound(int32Key(0x2)) + next(true, 0x2) + next(false, 0) + iter = tree.LowerBound(int32Key(0x3)) + next(false, 0) + + // Different prefix + ins(0x0101) + iter = tree.LowerBound(int32Key(0x100)) + next(true, 0x101) + next(false, 0) + + // case -1: Matching prefix (0x1??) but only smaller nodes behind it + ins(0x1100) + iter = tree.LowerBound(int32Key(0x102)) + next(true, 0x1100) + next(false, 0) + + // Short search keys + ins(0x010000) + + iter = tree.LowerBound([]byte{1}) + next(false, 0) + + iter = tree.LowerBound([]byte{0, 0}) + next(true, 0x1) + next(true, 0x2) + next(true, 0x0101) + next(true, 0x1100) + next(true, 0x010000) + next(false, 0) + + iter = tree.LowerBound([]byte{0, 1, 0}) + next(true, 0x010000) + next(false, 0) + + // Node256 + fmt.Println("node256:") + for i := 0; i < 50; i++ { // add less than 256 for some holes in node256.children + n := uint32(0x20000 + i) + _, _, tree = tree.Insert(int32Key(n), n) + keys = append(keys, n) + } + tree.PrintTree() + + iter = tree.LowerBound(int32Key(0x20000)) + for i := 0; i < 50; i++ { + n := uint32(0x20000 + i) + _, v, ok := iter.Next() + require.True(t, ok) + require.Equal(t, n, v) + } + _, _, ok := iter.Next() + require.False(t, ok) + + iter = tree.LowerBound([]byte{}) + for i := range keys { + _, v, ok := iter.Next() + require.True(t, ok) + require.Equal(t, keys[i], v) + } + _, _, ok = iter.Next() + require.False(t, ok) + +} + +func Test_lowerbound_regression(t *testing.T) { + // Regression test for bug in lowerbound() where the lowerbound search ended up + // in a smaller node and thought there were no larger nodes in the tree to iterate + // over. + + tree := New[uint64]() + ins := func(n uint64) { _, _, tree = tree.Insert(intKey(uint64(n)), uint64(n)) } + + values := []uint64{ + 70370, // ... 1 18 226 + 70411, // ... 1 19 11 + 70412, + } + + for _, v := range values { + ins(v) + } + + tree.PrintTree() + + iter := tree.LowerBound(intKey(70399)) + i := 1 + for _, obj, ok := iter.Next(); ok; _, obj, ok = iter.Next() { + require.Equal(t, values[i], obj) + i++ + } + require.Equal(t, len(values), i) +} + func Test_iterate(t *testing.T) { sizes := []int{0, 1, 10, 100, 1000, rand.Intn(1000)} for _, size := range sizes {