@@ -6,19 +6,37 @@ import (
6
6
"fmt"
7
7
"reflect"
8
8
"strings"
9
+ "sync"
9
10
"time"
10
11
12
+ "github.com/siddontang/loggers"
11
13
"golang.org/x/exp/slices"
12
14
)
13
15
14
16
type chunkerComposite struct {
15
- * coreChunker
17
+ sync.Mutex
18
+ Ti * TableInfo
19
+ chunkSize uint64
20
+ chunkPtrs []Datum // a list of Ptrs for each of the keys.
21
+ chunkKeys []string // all the keys to chunk on (usually all the col names of the PK)
22
+ keyName string // the name of the key we are chunking on
23
+ where string // any additional WHERE conditions.
24
+ finalChunkSent bool
25
+ isOpen bool
16
26
17
- chunkSize uint64
18
- chunkPtrs []Datum // a list of Ptrs for each of the keys.
19
- chunkKeys []string // all the keys to chunk on (usually all the col names of the PK)
20
- keyName string // the name of the key we are chunking on
21
- where string // any additional WHERE conditions.
27
+ // Dynamic Chunking is time based instead of row based.
28
+ // It uses *time* to determine the target chunk size.
29
+ chunkTimingInfo []time.Duration
30
+ ChunkerTarget time.Duration // i.e. 500ms for target
31
+
32
+ // This is used for restore.
33
+ watermark * Chunk
34
+ // Map from lowerbound value of a chunk -> chunk,
35
+ // Used to update the watermark by applying stored chunks,
36
+ // by comparing their lowerBound with current watermark upperBound.
37
+ lowerBoundWatermarkMap map [string ]* Chunk
38
+
39
+ logger loggers.Advanced
22
40
}
23
41
24
42
var _ Chunker = & chunkerComposite {}
@@ -227,6 +245,91 @@ func (t *chunkerComposite) Feedback(chunk *Chunk, d time.Duration) {
227
245
}
228
246
}
229
247
248
+ // GetLowWatermark returns the highest known value that has been safely copied,
249
+ // which (due to parallelism) could be significantly behind the high watermark.
250
+ // The value is discovered via ChunkerFeedback(), and when retrieved from this func
251
+ // can be used to write a checkpoint for restoration.
252
+ func (t * chunkerComposite ) GetLowWatermark () (string , error ) {
253
+ t .Lock ()
254
+ defer t .Unlock ()
255
+ if t .watermark == nil || t .watermark .UpperBound == nil || t .watermark .LowerBound == nil {
256
+ return "" , errors .New ("watermark not yet ready" )
257
+ }
258
+
259
+ return t .watermark .JSON (), nil
260
+ }
261
+
262
+ // isSpecialRestoredChunk is used to test for the first chunk after restore-from-checkpoint.
263
+ // The restored chunk is a really special beast because the lowerbound
264
+ // will be repeated by the first chunk that is applied post restore.
265
+ // This is called under a mutex.
266
+ func (t * chunkerComposite ) isSpecialRestoredChunk (chunk * Chunk ) bool {
267
+ if chunk .LowerBound == nil || chunk .UpperBound == nil || t .watermark == nil || t .watermark .LowerBound == nil || t .watermark .UpperBound == nil {
268
+ return false // restored checkpoints always have both.
269
+ }
270
+ return chunk .LowerBound .comparesTo (t .watermark .LowerBound )
271
+ }
272
+
273
+ // bumpWatermark updates the minimum value that is known to be safely copied,
274
+ // and is called under a mutex.
275
+ // Because of parallelism, it is possible that a chunk is copied out of order,
276
+ // so this func needs to account for that.
277
+ // Basically:
278
+ // - If the chunk does not "align" to the current low watermark, it's stored in a map keyed by its lowerBound valuesString() value.
279
+ // - If it does align, the watermark is bumped to the chunk's max value. Then
280
+ // stored chunk map is checked to see if an existing chunk lowerBound aligns with the new watermark.
281
+ // - If any stored chunk aligns, it is deleted off the map and the watermark is bumped.
282
+ // - This process repeats until there is no more alignment from the stored map *or* the map is empty.
283
+ func (t * chunkerComposite ) bumpWatermark (chunk * Chunk ) {
284
+ if chunk .UpperBound == nil {
285
+ return
286
+ }
287
+ // Check if this is the first chunk or it's the special restored chunk.
288
+ // If so, set the watermark and then go on to applying any stored chunks.
289
+ if (t .watermark == nil && chunk .LowerBound == nil ) || t .isSpecialRestoredChunk (chunk ) {
290
+ t .watermark = chunk
291
+ goto applyStoredChunks
292
+ }
293
+
294
+ // Validate that chunk has lower bound before moving on
295
+ if chunk .LowerBound == nil {
296
+ errMsg := fmt .Sprintf ("coreChunker.bumpWatermark: nil lowerBound value encountered more than once: %v" , chunk )
297
+ t .logger .Fatal (errMsg )
298
+ }
299
+
300
+ // We haven't set the first chunk yet, or it's not aligned with the
301
+ // previous watermark. Store it in the map keyed by its lowerBound, and move on.
302
+
303
+ // We only need to store by lowerBound because, when updating watermark
304
+ // we always compare the upperBound of current watermark to lowerBound of stored chunks.
305
+ // Key can never be nil, because first chunk will not hit this code path and all remaining chunks will have lowerBound.
306
+ if t .watermark == nil || ! t .watermark .UpperBound .comparesTo (chunk .LowerBound ) {
307
+ t .lowerBoundWatermarkMap [chunk .LowerBound .valuesString ()] = chunk
308
+ return
309
+ }
310
+
311
+ // The remaining case is:
312
+ // t.watermark.UpperBound.Value == chunk.LowerBound.Value
313
+ // Replace the current watermark with the chunk.
314
+ t .watermark = chunk
315
+
316
+ applyStoredChunks:
317
+
318
+ // Check the waterMarkMap for any chunks that align with the new watermark.
319
+ // If there are any, bump the watermark and delete from the map.
320
+ // If there are none, we're done.
321
+ for t .waterMarkMapNotEmpty () && t .watermark .UpperBound != nil && t .lowerBoundWatermarkMap [t .watermark .UpperBound .valuesString ()] != nil {
322
+ key := t .watermark .UpperBound .valuesString ()
323
+ nextWatermark := t .lowerBoundWatermarkMap [key ]
324
+ t .watermark = nextWatermark
325
+ delete (t .lowerBoundWatermarkMap , key )
326
+ }
327
+ }
328
+
329
+ func (t * chunkerComposite ) waterMarkMapNotEmpty () bool {
330
+ return t .lowerBoundWatermarkMap != nil && len (t .lowerBoundWatermarkMap ) != 0
331
+ }
332
+
230
333
func (t * chunkerComposite ) open () (err error ) {
231
334
if t .isOpen {
232
335
// This prevents an error where open is re-called
@@ -316,17 +419,3 @@ func (t *chunkerComposite) SetKey(keyName string, where string) error {
316
419
t .where = where
317
420
return nil
318
421
}
319
-
320
- // GetLowWatermark returns the highest known value that has been safely copied,
321
- // which (due to parallelism) could be significantly behind the high watermark.
322
- // The value is discovered via Chunker Feedback(), and when retrieved from this func
323
- // can be used to write a checkpoint for restoration.
324
- func (t * chunkerComposite ) GetLowWatermark () (string , error ) {
325
- t .Lock ()
326
- defer t .Unlock ()
327
- if t .watermark == nil || t .watermark .UpperBound == nil || t .watermark .LowerBound == nil {
328
- return "" , errors .New ("watermark not yet ready" )
329
- }
330
-
331
- return t .watermark .JSON (), nil
332
- }
0 commit comments