From 733f9234ee6e97693e0bf0434416e1d8c611cf25 Mon Sep 17 00:00:00 2001 From: "GUY.MOLINARI" Date: Wed, 23 Oct 2024 17:54:14 +0000 Subject: [PATCH] Code cleanup, etc. --- go.mod | 12 +++- roaring64/bsi64.go | 151 ++++++++++++++++++---------------------- roaring64/bsi64_test.go | 28 ++++---- 3 files changed, 90 insertions(+), 101 deletions(-) diff --git a/go.mod b/go.mod index ac83ee91..ec130a1b 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,18 @@ module github.com/RoaringBitmap/roaring/v2 -go 1.14 +go 1.22.0 + +toolchain go1.23.2 require ( github.com/bits-and-blooms/bitset v1.12.0 - github.com/google/uuid v1.6.0 // indirect + github.com/google/uuid v1.6.0 github.com/mschoch/smat v0.2.0 github.com/stretchr/testify v1.7.0 ) + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index b9e9cc98..255549ab 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -76,7 +76,7 @@ func (b *BSI) GetCardinality() uint64 { // BitCount returns the number of bits needed to represent values. func (b *BSI) BitCount() int { - return len(b.bA) - 1 // Exclude sign bit + return len(b.bA) - 1 // Exclude sign bit } // IsBigUInt returns the number of bits needed to represent values. @@ -302,7 +302,6 @@ func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int6 return b.CompareBigValue(parallelism, op, big.NewInt(valueOrStart), big.NewInt(end), foundSet) } - // CompareBigValue compares value. // Values should be in the range of the BSI (max, min). If the value is outside the range, the result // might erroneous. The operation parameter indicates the type of comparison to be made. @@ -320,64 +319,51 @@ func (b *BSI) CompareBigValue(parallelism int, op Operation, valueOrStart, end * return parallelExecutor(parallelism, comp, compareValue, foundSet) } -/* -func twosComplement(val *big.Int, bits int) *big.Int { - if val.Sign() != -1 { - return val - } - inverted := new(big.Int).Not(val) - mask := new(big.Int).Lsh(big.NewInt(1), uint(val.BitLen() + 1)) - inverted.And(val, mask.Sub(mask, big.NewInt(1))) - val.Set(inverted) - return val -} -*/ - // Returns a twos complement value given a value, the return will be bit extended to 'bits' length // if the value is negative func twosComplement(num *big.Int, bitCount int) *big.Int { - // Check if the number is negative - isNegative := num.Sign() < 0 - - // Get the absolute value if negative - abs := new(big.Int).Abs(num) - - // Convert to binary string - binStr := abs.Text(2) - - // Pad with zeros to the left - if len(binStr) < bitCount { - binStr = fmt.Sprintf("%0*s", bitCount, binStr) - } - - // If negative, calculate two's complement - if isNegative { - // Invert bits - inverted := make([]byte, len(binStr)) - for i := range binStr { - if binStr[i] == '0' { - inverted[i] = '1' - } else { - inverted[i] = '0' - } - } - - // Add 1 - carry := byte(1) - for i := len(inverted) - 1; i >= 0; i-- { - inverted[i] += carry - if inverted[i] == '2' { - inverted[i] = '0' - } else { - break - } - } - binStr = string(inverted) - } - - bigInt := new(big.Int) - _, _ = bigInt.SetString(binStr, 2) - return bigInt + // Check if the number is negative + isNegative := num.Sign() < 0 + + // Get the absolute value if negative + abs := new(big.Int).Abs(num) + + // Convert to binary string + binStr := abs.Text(2) + + // Pad with zeros to the left + if len(binStr) < bitCount { + binStr = fmt.Sprintf("%0*s", bitCount, binStr) + } + + // If negative, calculate two's complement + if isNegative { + // Invert bits + inverted := make([]byte, len(binStr)) + for i := range binStr { + if binStr[i] == '0' { + inverted[i] = '1' + } else { + inverted[i] = '0' + } + } + + // Add 1 + carry := byte(1) + for i := len(inverted) - 1; i >= 0; i-- { + inverted[i] += carry + if inverted[i] == '2' { + inverted[i] = '0' + } else { + break + } + } + binStr = string(inverted) + } + + bigInt := new(big.Int) + _, _ = bigInt.SetString(binStr, 2) + return bigInt } func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) { @@ -391,7 +377,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa startIsNegative := e.valueOrStart.Sign() == -1 endIsNegative := e.end.Sign() == -1 - + for i := 0; i < len(batch); i++ { cID := batch[i] eq1, eq2 := true, true @@ -401,10 +387,10 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa compStartValue := e.valueOrStart compEndValue := e.end if isNegative != startIsNegative { - compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount() + 1) + compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount()+1) } if isNegative != endIsNegative { - compEndValue = twosComplement(e.end, e.bsi.BitCount() + 1) + compEndValue = twosComplement(e.end, e.bsi.BitCount()+1) } for ; j >= 0; j-- { @@ -427,7 +413,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa break } } - } + } } else { // BIT in value is CLEAR if sliceContainsBit { @@ -473,8 +459,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa } } } - } - + } switch e.op { case LT: @@ -554,9 +539,7 @@ func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.In } for val := range resultsChan { - //if (op == MAX && val > minMax) || (op == MIN && val <= minMax) { if (op == MAX && val.Cmp(minMax) > 0) || (op == MIN && val.Cmp(minMax) <= 0) { -//fmt.Printf("VAL = %s > %s = %d\n", val.Text(10), minMax.Text(10), val.Cmp(minMax)) minMax = val } } @@ -564,15 +547,15 @@ func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.In } func minMaxSignedInt(bits int) (*big.Int, *big.Int) { - // Calculate the maximum value - max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1)) - max.Sub(max, big.NewInt(1)) + // Calculate the maximum value + max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1)) + max.Sub(max, big.NewInt(1)) - // Calculate the minimum value - min := new(big.Int).Neg(max) - min.Sub(min, big.NewInt(1)) + // Calculate the minimum value + min := new(big.Int).Neg(max) + min.Sub(min, big.NewInt(1)) - return min, max + return min, max } func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, wg *sync.WaitGroup) { @@ -601,8 +584,8 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, if isNegative != valueIsNegative { // convert compValue to twos complement inverted := new(big.Int).Not(compValue) - mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen())) - inverted.And(inverted, mask.Sub(mask, big.NewInt(1))) + mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen())) + inverted.And(inverted, mask.Sub(mask, big.NewInt(1))) inverted.Add(inverted, big.NewInt(1)) } @@ -610,9 +593,9 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, for ; j >= 0; j-- { sliceContainsBit := b.bA[j].Contains(cID) if sliceContainsBit { - bigBit := big.NewInt(1) - bigBit.Lsh(bigBit, uint(j)) - cVal.Or(cVal, bigBit) + bigBit := big.NewInt(1) + bigBit.Lsh(bigBit, uint(j)) + cVal.Or(cVal, bigBit) if isNegative { cVal = negativeTwosComplementToInt(cVal) } @@ -666,7 +649,6 @@ func (b *BSI) Sum(foundSet *Bitmap) (int64, uint64) { return val.Int64(), count } - // SumBigValues - Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet // is also returned (for calculating the average). This method will sum arbitrarily large values. func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) { @@ -679,7 +661,7 @@ func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) { wg.Add(1) go func(j int) { defer wg.Done() - resultsChan <- int64(foundSet.AndCardinality(&b.bA[j])<= b.BitCount() + 1 || b.BitCount() == 0 { + if i >= b.BitCount()+1 || b.BitCount() == 0 { b.bA = append(b.bA, Bitmap{}) } carry := And(&b.bA[i], foundSet) diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index a04482e4..01a3492c 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -45,12 +45,12 @@ func TestSetAndGetBigValue(t *testing.T) { bigUUID := big.NewInt(-578664753978847603) // Upper bits bigUUID.Lsh(bigUUID, 64) lowBits := big.NewInt(-5190910309365112881) // Lower bits - bigUUID.Add(bigUUID, lowBits) // Lower bits + bigUUID.Add(bigUUID, lowBits) // Lower bits bsi.SetBigValue(1, bigUUID) assert.Equal(t, bigUUID.BitLen(), bsi.BitCount()) bv, _ := bsi.GetBigValue(1) - assert.Equal(t, bigUUID, bv) + assert.Equal(t, bigUUID, bv) // Any code past this point will expect a panic error. This will happen if a large value was set // with SetBigValue() followed by a call to GetValue() where the set value exceeds 64 bits. @@ -59,7 +59,7 @@ func TestSetAndGetBigValue(t *testing.T) { t.Errorf("The code did not panic") } }() - bsi.GetValue(1) // this should panic. If so the test will pass. + bsi.GetValue(1) // this should panic. If so the test will pass. } func TestSetAndGetUUIDValue(t *testing.T) { @@ -72,7 +72,7 @@ func TestSetAndGetUUIDValue(t *testing.T) { bsi.SetBigValue(1, bigUUID) assert.Equal(t, bigUUID.BitLen(), bsi.BitCount()) bv, _ := bsi.GetBigValue(1) - assert.Equal(t, bigUUID, bv) + assert.Equal(t, bigUUID, bv) newUUID, err := uuid.FromBytes(bv.Bytes()) assert.Nil(t, err) @@ -127,16 +127,16 @@ func TestRangeBig(t *testing.T) { } start, _ := bsi.GetBigValue(uint64(45)) // starting value at columnID 45 - end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55 - set := bsi.CompareBigValue(0, RANGE, start, end, nil) - assert.Equal(t, uint64(11), set.GetCardinality()) - - i := set.Iterator() - for i.HasNext() { - v := i.Next() - assert.GreaterOrEqual(t, uint64(v), uint64(45)) - assert.LessOrEqual(t, uint64(v), uint64(55)) - } + end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55 + set := bsi.CompareBigValue(0, RANGE, start, end, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(45)) + assert.LessOrEqual(t, uint64(v), uint64(55)) + } assert.Equal(t, 67, bsi.BitCount()) }