From 34fc2bfde5108956369769352cf99eb75bd54505 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 20 Mar 2024 18:10:06 -0400 Subject: [PATCH] faster faster iandNotRun16 Co-authored-by: Neena Dugar --- arraycontainer.go | 57 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/arraycontainer.go b/arraycontainer.go index 1fa626a6..80fa676e 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -664,25 +664,54 @@ func (ac *arrayContainer) iandNot(a container) container { } func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container { - if len(ac.content) == 0 { + // Fast path: if either the array container or the run container is empty, the result is the array. + if ac.isEmpty() || rc.isEmpty() { // Empty return ac } - - for _, run := range rc.iv { - if run.start > ac.maximum() { - // Since the runs are sorted, we can stop here. (No subsequent runs will - // overlap with the array container.) - break - } - if run.last() < ac.minimum() { - // This run is entirely before the array container. We can skip it. - continue + // Fast path: if the run container is full, the result is empty. + if rc.isFull() { + ac.content = ac.content[:0] + return ac + } + current_run := 0 + // All values in [start_run, end_end] are part of the run + start_run := rc.iv[current_run].start + end_end := start_run + rc.iv[current_run].length + // We are going to read values in the array at index i, and we are + // going to write them at index pos. So we do in-place processing. + // We always have that pos <= i by construction. So we can either + // overwrite a value just read, or a value that was previous read. + pos := 0 + i := 0 + for ; i < len(ac.content); i++ { + if ac.content[i] < start_run { + // the value in the array appears before the run [start_run, end_end] + ac.content[pos] = ac.content[i] + pos++ + } else if ac.content[i] <= end_end { + // nothing to do, the value is in the array but also in the run. + } else { + // We have the value in the array after the run. We cannot tell + // whether we need to keep it or not. So let us move to another run. + if current_run+1 < len(rc.iv) { + current_run++ + start_run = rc.iv[current_run].start + end_end = start_run + rc.iv[current_run].length + i-- // retry with the same i + } else { + // We have exhausted the number of runs. We can keep the rest of the values + // from i to len(ac.content) - 1 inclusively. + break // We are done, the rest of the array will be kept + } } - - out := ac.iremoveRange(int(run.start), int(run.start)+int(run.length)+1).(*arrayContainer) - *ac = *out } + for ; i < len(ac.content); i++ { + ac.content[pos] = ac.content[i] + pos++ + } + // We 'shink' the slice. + ac.content = ac.content[:pos] return ac }