-
Notifications
You must be signed in to change notification settings - Fork 2
/
iterator.go
359 lines (324 loc) · 10 KB
/
iterator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium
package statedb
import (
"bytes"
"fmt"
"iter"
"slices"
"github.com/cilium/statedb/index"
"github.com/cilium/statedb/part"
)
// Collect creates a slice of objects out of the iterator.
// The iterator is consumed in the process.
func Collect[Obj any](seq iter.Seq2[Obj, Revision]) []Obj {
return slices.Collect(ToSeq(seq))
}
// Map a function over a sequence of objects returned by
// a query.
func Map[In, Out any](seq iter.Seq2[In, Revision], fn func(In) Out) iter.Seq2[Out, Revision] {
return func(yield func(Out, Revision) bool) {
for obj, rev := range seq {
if !yield(fn(obj), rev) {
break
}
}
}
}
func Filter[Obj any](seq iter.Seq2[Obj, Revision], keep func(Obj) bool) iter.Seq2[Obj, Revision] {
return func(yield func(Obj, Revision) bool) {
for obj, rev := range seq {
if keep(obj) {
if !yield(obj, rev) {
break
}
}
}
}
}
// ToSeq takes a Seq2 and produces a Seq with the first element of the pair.
func ToSeq[A, B any](seq iter.Seq2[A, B]) iter.Seq[A] {
return func(yield func(A) bool) {
for x, _ := range seq {
if !yield(x) {
break
}
}
}
}
// partSeq returns a casted sequence of objects from a part Iterator.
func partSeq[Obj any](iter *part.Iterator[object]) iter.Seq2[Obj, Revision] {
return func(yield func(Obj, Revision) bool) {
// Iterate over a clone of the original iterator to allow the sequence to be iterated
// from scratch multiple times.
it := iter.Clone()
for {
_, iobj, ok := it.Next()
if !ok {
break
}
if !yield(iobj.data.(Obj), iobj.revision) {
break
}
}
}
}
// nonUniqueSeq returns a sequence of objects for a non-unique index.
// Non-unique indexes work by concatenating the secondary key with the
// primary key and then prefix searching for the items:
//
// <secondary>\0<primary><secondary length>
// ^^^^^^^^^^^
//
// Since the primary key can be of any length and we're prefix searching,
// we need to iterate over all objects matching the prefix and only emitting
// those which have the correct secondary key length.
// For example if we search for the key "aaaa", then we might have the following
// matches (_ is just delimiting, not part of the key):
//
// aaaa\0bbb4
// aaa\0abab3
// aaaa\0ccc4
//
// We yield "aaaa\0bbb4", skip "aaa\0abab3" and yield "aaaa\0ccc4".
func nonUniqueSeq[Obj any](iter *part.Iterator[object], prefixSearch bool, searchKey []byte) iter.Seq2[Obj, Revision] {
return func(yield func(Obj, Revision) bool) {
// Clone the iterator to allow multiple iterations over the sequence.
it := iter.Clone()
var visited map[string]struct{}
if prefixSearch {
// When prefix searching, keep track of objects we've already seen as
// multiple keys in non-unique index may map to a single object.
// When just doing a List() on a non-unique index we will see each object
// only once and do not need to track this.
//
// This of course makes iterating over a non-unique index with a prefix
// (or lowerbound search) about 20x slower than normal!
visited = map[string]struct{}{}
}
for {
key, iobj, ok := it.Next()
if !ok {
break
}
secondary, primary := decodeNonUniqueKey(key)
switch {
case !prefixSearch && len(secondary) != len(searchKey):
// This a List(), thus secondary key must match length exactly.
continue
case prefixSearch && len(secondary) < len(searchKey):
// This is Prefix(), thus key must be equal or longer to search key.
continue
}
if prefixSearch {
// When doing a prefix search on a non-unique index we may see the
// same object multiple times since multiple keys may point it.
// Skip if we've already seen this object.
if _, found := visited[string(primary)]; found {
continue
}
visited[string(primary)] = struct{}{}
}
if !yield(iobj.data.(Obj), iobj.revision) {
break
}
}
}
}
func nonUniqueLowerBoundSeq[Obj any](iter *part.Iterator[object], searchKey []byte) iter.Seq2[Obj, Revision] {
return func(yield func(Obj, Revision) bool) {
// Clone the iterator to allow multiple uses.
iter = iter.Clone()
// Keep track of objects we've already seen as multiple keys in non-unique
// index may map to a single object.
visited := map[string]struct{}{}
for {
key, iobj, ok := iter.Next()
if !ok {
break
}
// With a non-unique index we have a composite key <secondary><primary><secondary len>.
// This means we need to check every key that it's larger or equal to the search key.
// Just seeking to the first one isn't enough as the secondary key length may vary.
secondary, primary := decodeNonUniqueKey(key)
if bytes.Compare(secondary, searchKey) >= 0 {
if _, found := visited[string(primary)]; found {
continue
}
visited[string(primary)] = struct{}{}
if !yield(iobj.data.(Obj), iobj.revision) {
return
}
}
}
}
}
// iterator adapts the "any" object iterator to a typed object.
type iterator[Obj any] struct {
iter interface{ Next() ([]byte, object, bool) }
}
func (it *iterator[Obj]) Next() (obj Obj, revision uint64, ok bool) {
_, iobj, ok := it.iter.Next()
if ok {
obj = iobj.data.(Obj)
revision = iobj.revision
}
return
}
// Iterator for iterating a sequence objects.
type Iterator[Obj any] interface {
// Next returns the next object and its revision if ok is true, otherwise
// zero values to mean that the iteration has finished.
Next() (obj Obj, rev Revision, ok bool)
}
func NewDualIterator[Obj any](left, right Iterator[Obj]) *DualIterator[Obj] {
return &DualIterator[Obj]{
left: iterState[Obj]{iter: left},
right: iterState[Obj]{iter: right},
}
}
type iterState[Obj any] struct {
iter Iterator[Obj]
obj Obj
rev Revision
ok bool
}
// DualIterator allows iterating over two iterators in revision order.
// Meant to be used for combined iteration of LowerBound(ByRevision)
// and Deleted().
type DualIterator[Obj any] struct {
left iterState[Obj]
right iterState[Obj]
}
func (it *DualIterator[Obj]) Next() (obj Obj, revision uint64, fromLeft, ok bool) {
// Advance the iterators
if !it.left.ok && it.left.iter != nil {
it.left.obj, it.left.rev, it.left.ok = it.left.iter.Next()
if !it.left.ok {
it.left.iter = nil
}
}
if !it.right.ok && it.right.iter != nil {
it.right.obj, it.right.rev, it.right.ok = it.right.iter.Next()
if !it.right.ok {
it.right.iter = nil
}
}
// Find the lowest revision object
switch {
case !it.left.ok && !it.right.ok:
ok = false
return
case it.left.ok && !it.right.ok:
it.left.ok = false
return it.left.obj, it.left.rev, true, true
case it.right.ok && !it.left.ok:
it.right.ok = false
return it.right.obj, it.right.rev, false, true
case it.left.rev <= it.right.rev:
it.left.ok = false
return it.left.obj, it.left.rev, true, true
case it.right.rev <= it.left.rev:
it.right.ok = false
return it.right.obj, it.right.rev, false, true
default:
panic(fmt.Sprintf("BUG: Unhandled case: %+v", it))
}
}
type changeIterator[Obj any] struct {
table Table[Obj]
revision Revision
deleteRevision Revision
dt *deleteTracker[Obj]
iter *DualIterator[Obj]
watch <-chan struct{}
}
func (it *changeIterator[Obj]) refresh(txn ReadTxn) {
// Instead of indexReadTxn() we look up directly here so we don't
// refresh from mutated indexes in case [txn] is a WriteTxn. This
// is important as the WriteTxn may be aborted and thus revisions will
// reset back and watermarks bumped from here would be invalid.
itxn := txn.getTxn()
indexEntry := itxn.root[it.table.tablePos()].indexes[RevisionIndexPos]
indexTxn := indexReadTxn{indexEntry.tree, indexEntry.unique}
updateIter := &iterator[Obj]{indexTxn.LowerBound(index.Uint64(it.revision + 1))}
deleteIter := it.dt.deleted(itxn, it.deleteRevision+1)
it.iter = NewDualIterator(deleteIter, updateIter)
// It is enough to watch the revision index and not the graveyard since
// any object that is inserted into the graveyard will be deleted from
// the revision index.
it.watch = indexTxn.RootWatch()
}
func (it *changeIterator[Obj]) Next(txn ReadTxn) (seq iter.Seq2[Change[Obj], Revision], watch <-chan struct{}) {
if it.iter == nil {
// Iterator has been exhausted, check if we need to requery
// or whether we need to wait for changes first.
select {
case <-it.watch:
// Watch channel closed, so new changes await
default:
// Watch channel for the query not closed yet, so return it to allow
// caller to wait for the new changes.
watch = it.watch
seq = func(yield func(Change[Obj], Revision) bool) {}
return
}
}
// Refresh the iterator regardless if it was fully consumed or not to
// pull in new changes. We keep returning a closed channel until the
// iterator has been fully consumed. This does mean there's an extra
// Next() call to get a proper watch channel, but it does make this
// API much safer to use even when only partially consuming the
// sequence.
it.refresh(txn)
watch = closedWatchChannel
seq = func(yield func(Change[Obj], Revision) bool) {
if it.iter == nil {
return
}
for obj, rev, deleted, ok := it.iter.Next(); ok; obj, rev, deleted, ok = it.iter.Next() {
if deleted {
it.deleteRevision = rev
it.dt.mark(rev)
} else {
it.revision = rev
}
change := Change[Obj]{
Object: obj,
Revision: rev,
Deleted: deleted,
}
if !yield(change, rev) {
return
}
}
it.iter = nil
}
return
}
// changesAny is for implementing the /changes HTTP API where the concrete object
// type is not known.
func (it *changeIterator[Obj]) nextAny(txn ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{}) {
seq, watch := it.Next(txn)
return func(yield func(Change[any], Revision) bool) {
for change, rev := range seq {
ok := yield(Change[any]{
Object: change.Object,
Revision: change.Revision,
Deleted: change.Deleted,
}, rev)
if !ok {
break
}
}
}, watch
}
func (it *changeIterator[Obj]) close() {
if it.dt != nil {
it.dt.close()
}
it.dt = nil
}
type anyChangeIterator interface {
nextAny(ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{})
}