Skip to content

Commit

Permalink
part: Fix edge case in LowerBound()
Browse files Browse the repository at this point in the history
If the tree contained a leaf under a node that had matching
prefix to the lowerbound search key, but which was smaller then
we hit the "prefix smaller" case which incorrectly returned
an empty set of edges to explore when it should've returned
the larger set of edges from the parent(s).

Reproducer from fuzz test for the bug:

  node4[[0 0 0 0 0 1]]:(0xc000176910)
   leaf[[18 226]]: [0 0 0 0 0 1 18 226] -> 70370(0xc00007e4b0)
   node48[[19]]:(0xc00014f6c0)
    leaf[[11]]: [0 0 0 0 0 1 19 11] -> 70411(0xc00007e6e0)
    leaf[[12]]: [0 0 0 0 0 1 19 12] -> 70412(0xc00007e730)
    leaf[[13]]: [0 0 0 0 0 1 19 13] -> 70413(0xc00007e910)
    leaf[[14]]: [0 0 0 0 0 1 19 14] -> 70414(0xc00007eaa0)

Search key was [... 0 1 18 240 ], which lead the lowerbound
search to "leaf[[18 226]]" and to the false conclusion that
there is nothing in the tree above the search key, when in fact
it should've returned "node48[[19]]" as the edge to explore.

Added regression test for this exact reproducer, and edge case
tests for lowerbound() and checked that all cases were covered.

Removed the "len(key) == 0" branch as that is never taken since
we compare to the prefix with "min(len(key), len(prefix)", so if
len(key) < len(prefix) we always end up in the "-1" case instead.

For ease of debugging in future, PrintTree() now uses %x instead
of %v to print keys in hex.
  • Loading branch information
joamaki committed Apr 17, 2024
1 parent 7bac765 commit 48c2e61
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 23 deletions.
11 changes: 6 additions & 5 deletions fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ func newDebugLogger(worker int) *debugLogger {
}

const (
numUniqueIDs = 2000
numUniqueValues = 3000
numUniqueIDs = 3000
numUniqueValues = 2000
numWorkers = 20
numIterations = 5000
numTrackers = 5
numIterations = 1000
)

type fuzzObj struct {
Expand Down Expand Up @@ -514,8 +515,8 @@ func TestDB_Fuzz(t *testing.T) {
// Start change trackers to observe changes.
stop := make(chan struct{})
var wg2 sync.WaitGroup
wg2.Add(3)
for i := 0; i < 3; i++ {
wg2.Add(numTrackers)
for i := 0; i < numTrackers; i++ {
i := i
go func() {
trackerWorker(i, stop)
Expand Down
14 changes: 6 additions & 8 deletions part/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,20 @@ loop:
for {
switch bytes.Compare(this.prefix, key[:min(len(key), len(this.prefix))]) {
case -1:
// Prefix is smaller, which means there is no node smaller than
// the given lowerbound.
return &Iterator[T]{nil}
// Prefix is smaller, stop here and return an iterator for
// the larger nodes in the parent's.
break loop

case 0:
if len(this.prefix) == len(key) {
// Exact match.
edges = append(edges, []*header[T]{this})
break loop
} else if len(key) == 0 {
// Search key exhausted, find the minimum node.
edges = traverseToMin(this, edges)
break loop
}

// Prefix matches, keep going.
// Prefix matches the beginning of the key, but more
// remains of the key. Drop the matching part and keep
// going further.
key = key[len(this.prefix):]

if this.kind() == nodeKind256 {
Expand Down
12 changes: 6 additions & 6 deletions part/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,24 +212,24 @@ func (n *header[T]) printTree(level int) {
var children []*header[T]
switch n.kind() {
case nodeKindLeaf:
fmt.Printf("leaf[%v]:", n.prefix)
fmt.Printf("leaf[%x]:", n.prefix)
case nodeKind4:
fmt.Printf("node4[%v]:", n.prefix)
fmt.Printf("node4[%x]:", n.prefix)
children = n.node4().children[:n.size()]
case nodeKind16:
fmt.Printf("node16[%v]:", n.prefix)
fmt.Printf("node16[%x]:", n.prefix)
children = n.node16().children[:n.size()]
case nodeKind48:
fmt.Printf("node48[%v]:", n.prefix)
fmt.Printf("node48[%x]:", n.prefix)
children = n.node48().children[:n.size()]
case nodeKind256:
fmt.Printf("node256[%v]:", n.prefix)
fmt.Printf("node256[%x]:", n.prefix)
children = n.node256().children[:]
default:
panic("unknown node kind")
}
if leaf := n.getLeaf(); leaf != nil {
fmt.Printf(" %v -> %v", leaf.key, leaf.value)
fmt.Printf(" %x -> %v", leaf.key, leaf.value)
}
fmt.Printf("(%p)\n", n)

Expand Down
167 changes: 163 additions & 4 deletions part/part_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package part

import (
"encoding/binary"
"fmt"
"math/rand"
"testing"
"time"
Expand Down Expand Up @@ -58,6 +59,10 @@ func intKey(n uint64) []byte {
return binary.BigEndian.AppendUint64(nil, n)
}

func int32Key(n uint32) []byte {
return binary.BigEndian.AppendUint32(nil, n)
}

func Test_simple_delete(t *testing.T) {
tree := New[uint64]()
txn := tree.Txn()
Expand All @@ -79,10 +84,10 @@ func Test_delete(t *testing.T) {
tree := New[uint64]()

// Do multiple rounds with the same tree.
for round := 0; round < 10; round++ {
for round := 0; round < 100; round++ {
// Use a random amount of keys in random order to exercise different
// tree structures each time.
numKeys := 10 + rand.Intn(5000)
numKeys := 10 + rand.Intn(1000)
t.Logf("numKeys=%d", numKeys)

keys := []uint64{}
Expand Down Expand Up @@ -178,16 +183,34 @@ func Test_delete(t *testing.T) {
assert.Equal(t, num, len(keys))

// Test that lowerbound iteration is ordered and correct
prev = keys[len(keys)/2]
iter = tree.LowerBound(intKey(prev + 1))
idx := len(keys) / 2
prev = keys[idx]
num = 0
start := prev + 1
iter = tree.LowerBound(intKey(start))
obs := []uint64{}
for {
_, v, ok := iter.Next()
if !ok {
break
}
num++
obs = append(obs, v)
require.Greater(t, v, prev)
prev = v
}
exp := 0
for _, k := range keys {
if k >= start {
exp++
}
}
if !assert.Equal(t, exp, num) {
t.Logf("LowerBound from %d failed", start)
t.Logf("observed: %v", obs)
tree.PrintTree()
t.Fatal()
}

// Test that prefix iteration is ordered and correct
prev = 0
Expand Down Expand Up @@ -491,6 +514,142 @@ func Test_lowerbound(t *testing.T) {
next(false, 0)
}

func Test_lowerbound_edge_cases(t *testing.T) {
tree := New[uint32]()
keys := []uint32{}
ins := func(n uint32) {
_, _, tree = tree.Insert(int32Key(n), n)
keys = append(keys, n)
fmt.Printf("%x:\n", keys)
tree.root.printTree(2)
}

var iter *Iterator[uint32]
next := func(exOK bool, exVal int) {
t.Helper()
_, v, ok := iter.Next()
assert.Equal(t, exOK, ok)
require.EqualValues(t, exVal, v)
}

// Empty tree
iter = tree.LowerBound([]byte{})
next(false, 0)
iter = tree.LowerBound(int32Key(0x1))
next(false, 0)

// case 0: Leaf at the root
ins(0x1)
iter = tree.LowerBound([]byte{})
next(true, 0x1)
next(false, 0)
iter = tree.LowerBound(int32Key(0x1))
next(true, 0x1)
next(false, 0)
iter = tree.LowerBound(int32Key(0x2))
next(false, 0)

// Two leafs, node4 root
ins(0x2)
iter = tree.LowerBound([]byte{})
next(true, 0x1)
next(true, 0x2)
next(false, 0)
iter = tree.LowerBound(int32Key(0x2))
next(true, 0x2)
next(false, 0)
iter = tree.LowerBound(int32Key(0x3))
next(false, 0)

// Different prefix
ins(0x0101)
iter = tree.LowerBound(int32Key(0x100))
next(true, 0x101)
next(false, 0)

// case -1: Matching prefix (0x1??) but only smaller nodes behind it
ins(0x1100)
iter = tree.LowerBound(int32Key(0x102))
next(true, 0x1100)
next(false, 0)

// Short search keys
ins(0x010000)

iter = tree.LowerBound([]byte{1})
next(false, 0)

iter = tree.LowerBound([]byte{0, 0})
next(true, 0x1)
next(true, 0x2)
next(true, 0x0101)
next(true, 0x1100)
next(true, 0x010000)
next(false, 0)

iter = tree.LowerBound([]byte{0, 1, 0})
next(true, 0x010000)
next(false, 0)

// Node256
fmt.Println("node256:")
for i := 0; i < 50; i++ { // add less than 256 for some holes in node256.children
n := uint32(0x20000 + i)
_, _, tree = tree.Insert(int32Key(n), n)
keys = append(keys, n)
}
tree.PrintTree()

iter = tree.LowerBound(int32Key(0x20000))
for i := 0; i < 50; i++ {
n := uint32(0x20000 + i)
_, v, ok := iter.Next()
require.True(t, ok)
require.Equal(t, n, v)
}
_, _, ok := iter.Next()
require.False(t, ok)

iter = tree.LowerBound([]byte{})
for i := range keys {
_, v, ok := iter.Next()
require.True(t, ok)
require.Equal(t, keys[i], v)
}
_, _, ok = iter.Next()
require.False(t, ok)

}

func Test_lowerbound_regression(t *testing.T) {
// Regression test for bug in lowerbound() where the lowerbound search ended up
// in a smaller node and thought there were no larger nodes in the tree to iterate
// over.

tree := New[uint64]()
ins := func(n uint64) { _, _, tree = tree.Insert(intKey(uint64(n)), uint64(n)) }

values := []uint64{
70370, // ... 1 18 226
70411, // ... 1 19 11
70412,
}

for _, v := range values {
ins(v)
}

tree.PrintTree()

iter := tree.LowerBound(intKey(70399))
i := 1
for _, obj, ok := iter.Next(); ok; _, obj, ok = iter.Next() {
require.Equal(t, values[i], obj)
i++
}
require.Equal(t, len(values), i)
}

func Test_iterate(t *testing.T) {
sizes := []int{0, 1, 10, 100, 1000, rand.Intn(1000)}
for _, size := range sizes {
Expand Down

0 comments on commit 48c2e61

Please sign in to comment.