Skip to content

Commit

Permalink
Code cleanup, etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
gamolina committed Oct 23, 2024
1 parent b6d009d commit 733f923
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 101 deletions.
12 changes: 10 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
module github.com/RoaringBitmap/roaring/v2

go 1.14
go 1.22.0

toolchain go1.23.2

require (
github.com/bits-and-blooms/bitset v1.12.0
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0
github.com/mschoch/smat v0.2.0
github.com/stretchr/testify v1.7.0
)

require (
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)
151 changes: 66 additions & 85 deletions roaring64/bsi64.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (b *BSI) GetCardinality() uint64 {

// BitCount returns the number of bits needed to represent values.
func (b *BSI) BitCount() int {
return len(b.bA) - 1 // Exclude sign bit
return len(b.bA) - 1 // Exclude sign bit
}

// IsBigUInt returns the number of bits needed to represent values.
Expand Down Expand Up @@ -302,7 +302,6 @@ func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int6
return b.CompareBigValue(parallelism, op, big.NewInt(valueOrStart), big.NewInt(end), foundSet)
}


// CompareBigValue compares value.
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
// might erroneous. The operation parameter indicates the type of comparison to be made.
Expand All @@ -320,64 +319,51 @@ func (b *BSI) CompareBigValue(parallelism int, op Operation, valueOrStart, end *
return parallelExecutor(parallelism, comp, compareValue, foundSet)
}

/*
func twosComplement(val *big.Int, bits int) *big.Int {
if val.Sign() != -1 {
return val
}
inverted := new(big.Int).Not(val)
mask := new(big.Int).Lsh(big.NewInt(1), uint(val.BitLen() + 1))
inverted.And(val, mask.Sub(mask, big.NewInt(1)))
val.Set(inverted)
return val
}
*/

// Returns a twos complement value given a value, the return will be bit extended to 'bits' length
// if the value is negative
func twosComplement(num *big.Int, bitCount int) *big.Int {
// Check if the number is negative
isNegative := num.Sign() < 0

// Get the absolute value if negative
abs := new(big.Int).Abs(num)

// Convert to binary string
binStr := abs.Text(2)

// Pad with zeros to the left
if len(binStr) < bitCount {
binStr = fmt.Sprintf("%0*s", bitCount, binStr)
}

// If negative, calculate two's complement
if isNegative {
// Invert bits
inverted := make([]byte, len(binStr))
for i := range binStr {
if binStr[i] == '0' {
inverted[i] = '1'
} else {
inverted[i] = '0'
}
}

// Add 1
carry := byte(1)
for i := len(inverted) - 1; i >= 0; i-- {
inverted[i] += carry
if inverted[i] == '2' {
inverted[i] = '0'
} else {
break
}
}
binStr = string(inverted)
}

bigInt := new(big.Int)
_, _ = bigInt.SetString(binStr, 2)
return bigInt
// Check if the number is negative
isNegative := num.Sign() < 0

// Get the absolute value if negative
abs := new(big.Int).Abs(num)

// Convert to binary string
binStr := abs.Text(2)

// Pad with zeros to the left
if len(binStr) < bitCount {
binStr = fmt.Sprintf("%0*s", bitCount, binStr)
}

// If negative, calculate two's complement
if isNegative {
// Invert bits
inverted := make([]byte, len(binStr))
for i := range binStr {
if binStr[i] == '0' {
inverted[i] = '1'
} else {
inverted[i] = '0'
}
}

// Add 1
carry := byte(1)
for i := len(inverted) - 1; i >= 0; i-- {
inverted[i] += carry
if inverted[i] == '2' {
inverted[i] = '0'
} else {
break
}
}
binStr = string(inverted)
}

bigInt := new(big.Int)
_, _ = bigInt.SetString(binStr, 2)
return bigInt
}

func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
Expand All @@ -391,7 +377,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa

startIsNegative := e.valueOrStart.Sign() == -1
endIsNegative := e.end.Sign() == -1

for i := 0; i < len(batch); i++ {
cID := batch[i]
eq1, eq2 := true, true
Expand All @@ -401,10 +387,10 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
compStartValue := e.valueOrStart
compEndValue := e.end
if isNegative != startIsNegative {
compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount() + 1)
compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount()+1)
}
if isNegative != endIsNegative {
compEndValue = twosComplement(e.end, e.bsi.BitCount() + 1)
compEndValue = twosComplement(e.end, e.bsi.BitCount()+1)
}

for ; j >= 0; j-- {
Expand All @@ -427,7 +413,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
break
}
}
}
}
} else {
// BIT in value is CLEAR
if sliceContainsBit {
Expand Down Expand Up @@ -473,8 +459,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
}
}
}
}

}

switch e.op {
case LT:
Expand Down Expand Up @@ -554,25 +539,23 @@ func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.In
}

for val := range resultsChan {
//if (op == MAX && val > minMax) || (op == MIN && val <= minMax) {
if (op == MAX && val.Cmp(minMax) > 0) || (op == MIN && val.Cmp(minMax) <= 0) {
//fmt.Printf("VAL = %s > %s = %d\n", val.Text(10), minMax.Text(10), val.Cmp(minMax))
minMax = val
}
}
return minMax
}

func minMaxSignedInt(bits int) (*big.Int, *big.Int) {
// Calculate the maximum value
max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1))
max.Sub(max, big.NewInt(1))
// Calculate the maximum value
max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1))
max.Sub(max, big.NewInt(1))

// Calculate the minimum value
min := new(big.Int).Neg(max)
min.Sub(min, big.NewInt(1))
// Calculate the minimum value
min := new(big.Int).Neg(max)
min.Sub(min, big.NewInt(1))

return min, max
return min, max
}

func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, wg *sync.WaitGroup) {
Expand Down Expand Up @@ -601,18 +584,18 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int,
if isNegative != valueIsNegative {
// convert compValue to twos complement
inverted := new(big.Int).Not(compValue)
mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen()))
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen()))
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
inverted.Add(inverted, big.NewInt(1))
}

done := false
for ; j >= 0; j-- {
sliceContainsBit := b.bA[j].Contains(cID)
if sliceContainsBit {
bigBit := big.NewInt(1)
bigBit.Lsh(bigBit, uint(j))
cVal.Or(cVal, bigBit)
bigBit := big.NewInt(1)
bigBit.Lsh(bigBit, uint(j))
cVal.Or(cVal, bigBit)
if isNegative {
cVal = negativeTwosComplementToInt(cVal)
}
Expand Down Expand Up @@ -666,7 +649,6 @@ func (b *BSI) Sum(foundSet *Bitmap) (int64, uint64) {
return val.Int64(), count
}


// SumBigValues - Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
// is also returned (for calculating the average). This method will sum arbitrarily large values.
func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) {
Expand All @@ -679,7 +661,7 @@ func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) {
wg.Add(1)
go func(j int) {
defer wg.Done()
resultsChan <- int64(foundSet.AndCardinality(&b.bA[j])<<uint(j))
resultsChan <- int64(foundSet.AndCardinality(&b.bA[j]) << uint(j))
}(i)
}
wg.Wait()
Expand All @@ -689,7 +671,7 @@ func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) {
sum.Add(sum, big.NewInt(val))
}
sum.Sub(sum, big.NewInt(int64(foundSet.AndCardinality(&b.bA[b.BitCount()])<<uint(b.BitCount()))))

return sum, count
}

Expand Down Expand Up @@ -900,13 +882,12 @@ func (b *BSI) BatchEqual(parallelism int, values []int64) *Bitmap {
//convert list of int64 values to big.Int(s)
bigValues := make([]*big.Int, len(values))
for i, v := range values {
bigValues[i] = big.NewInt(v)
bigValues[i] = big.NewInt(v)
}
return b.BatchEqualBig(parallelism, bigValues)
}


// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
// BatchEqualBig returns a bitmap containing the column IDs where the values are contained within the list of values provided.
func (b *BSI) BatchEqualBig(parallelism int, values []*big.Int) *Bitmap {

valMap := make(map[string]struct{}, len(values))
Expand Down Expand Up @@ -970,7 +951,7 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {

newBSI := NewDefaultBSI()
newBSI.bA = make([]Bitmap, b.BitCount() + 1)
newBSI.bA = make([]Bitmap, b.BitCount()+1)
var wg sync.WaitGroup
wg.Add(1)
go func() {
Expand Down Expand Up @@ -1006,7 +987,7 @@ func (b *BSI) Add(other *BSI) {

func (b *BSI) addDigit(foundSet *Bitmap, i int) {

if i >= b.BitCount() + 1 || b.BitCount() == 0 {
if i >= b.BitCount()+1 || b.BitCount() == 0 {
b.bA = append(b.bA, Bitmap{})
}
carry := And(&b.bA[i], foundSet)
Expand Down
28 changes: 14 additions & 14 deletions roaring64/bsi64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ func TestSetAndGetBigValue(t *testing.T) {
bigUUID := big.NewInt(-578664753978847603) // Upper bits
bigUUID.Lsh(bigUUID, 64)
lowBits := big.NewInt(-5190910309365112881) // Lower bits
bigUUID.Add(bigUUID, lowBits) // Lower bits
bigUUID.Add(bigUUID, lowBits) // Lower bits

bsi.SetBigValue(1, bigUUID)
assert.Equal(t, bigUUID.BitLen(), bsi.BitCount())
bv, _ := bsi.GetBigValue(1)
assert.Equal(t, bigUUID, bv)
assert.Equal(t, bigUUID, bv)

// Any code past this point will expect a panic error. This will happen if a large value was set
// with SetBigValue() followed by a call to GetValue() where the set value exceeds 64 bits.
Expand All @@ -59,7 +59,7 @@ func TestSetAndGetBigValue(t *testing.T) {
t.Errorf("The code did not panic")
}
}()
bsi.GetValue(1) // this should panic. If so the test will pass.
bsi.GetValue(1) // this should panic. If so the test will pass.
}

func TestSetAndGetUUIDValue(t *testing.T) {
Expand All @@ -72,7 +72,7 @@ func TestSetAndGetUUIDValue(t *testing.T) {
bsi.SetBigValue(1, bigUUID)
assert.Equal(t, bigUUID.BitLen(), bsi.BitCount())
bv, _ := bsi.GetBigValue(1)
assert.Equal(t, bigUUID, bv)
assert.Equal(t, bigUUID, bv)

newUUID, err := uuid.FromBytes(bv.Bytes())
assert.Nil(t, err)
Expand Down Expand Up @@ -127,16 +127,16 @@ func TestRangeBig(t *testing.T) {
}

start, _ := bsi.GetBigValue(uint64(45)) // starting value at columnID 45
end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55
set := bsi.CompareBigValue(0, RANGE, start, end, nil)
assert.Equal(t, uint64(11), set.GetCardinality())
i := set.Iterator()
for i.HasNext() {
v := i.Next()
assert.GreaterOrEqual(t, uint64(v), uint64(45))
assert.LessOrEqual(t, uint64(v), uint64(55))
}
end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55
set := bsi.CompareBigValue(0, RANGE, start, end, nil)
assert.Equal(t, uint64(11), set.GetCardinality())

i := set.Iterator()
for i.HasNext() {
v := i.Next()
assert.GreaterOrEqual(t, uint64(v), uint64(45))
assert.LessOrEqual(t, uint64(v), uint64(55))
}
assert.Equal(t, 67, bsi.BitCount())
}

Expand Down

0 comments on commit 733f923

Please sign in to comment.