Skip to content

Commit

Permalink
Merge pull request #470 from RoaringBitmap/issue469
Browse files Browse the repository at this point in the history
fixes Issue 469 (various Validate bugs)
  • Loading branch information
lemire authored Feb 4, 2025
2 parents 461d7ec + e5a4a6f commit a21b4b2
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 40 deletions.
3 changes: 3 additions & 0 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -2133,6 +2133,9 @@ func (rb *Bitmap) Stats() Statistics {
return stats
}

// Validate checks if the bitmap is internally consistent.
// You may call it after deserialization to check that the bitmap is valid.
// This function returns an error if the bitmap is invalid, nil otherwise.
func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}
4 changes: 2 additions & 2 deletions roaring64/bsi64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ func TestSetAndGetBigTimestamp(t *testing.T) {
// Recover and check the known timestamp
bv, _ := bsi.GetBigValue(1)
seconds, nanoseconds := bigIntToSecondsAndNanos(bv)
ts := time.Unix(seconds, int64(nanoseconds))
assert.Equal(t, "3024-10-23T16:55:46.763295273Z", ts.Format(time.RFC3339Nano))
assert.Equal(t, seconds, int64(33286611346))
assert.Equal(t, nanoseconds, int32(763295273))
assert.Equal(t, 67, bsi.BitCount())
}

Expand Down
2 changes: 0 additions & 2 deletions roaring64/roaring64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2015,8 +2015,6 @@ func Test32As64(t *testing.T) {
func TestRoaringArray64Validation(t *testing.T) {
a := roaringArray64{}

assert.ErrorIs(t, a.validate(), ErrEmptyKeys)

a.keys = append(a.keys, uint32(3), uint32(1))
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
a.clear()
Expand Down
9 changes: 3 additions & 6 deletions roaring64/roaringarray64.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ type roaringArray64 struct {
}

var (
ErrEmptyKeys = errors.New("keys were empty")
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)
Expand Down Expand Up @@ -437,10 +436,6 @@ func (ra *roaringArray64) checkKeysSorted() bool {
// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray64) validate() error {
if len(ra.keys) == 0 {
return ErrEmptyKeys
}

if !ra.checkKeysSorted() {
return ErrKeySortOrder
}
Expand All @@ -454,11 +449,13 @@ func (ra *roaringArray64) validate() error {
}

for _, maps := range ra.containers {

err := maps.Validate()
if err != nil {
return err
}
if maps.IsEmpty() {
return errors.New("empty container")
}
}

return nil
Expand Down
64 changes: 58 additions & 6 deletions roaring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2732,8 +2732,6 @@ func TestFromBitSet(t *testing.T) {
func TestRoaringArrayValidation(t *testing.T) {
a := newRoaringArray()

assert.ErrorIs(t, a.validate(), ErrEmptyKeys)

a.keys = append(a.keys, uint16(3), uint16(1))
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
a.clear()
Expand All @@ -2744,7 +2742,7 @@ func TestRoaringArrayValidation(t *testing.T) {
a.containers = append(a.containers, &runContainer16{}, &runContainer16{}, &runContainer16{})
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
assert.Errorf(t, a.validate(), "zero intervals")
assert.ErrorIs(t, a.validate(), ErrRunIntervalsEmpty)
}

func TestBitMapValidation(t *testing.T) {
Expand Down Expand Up @@ -2805,11 +2803,9 @@ func TestBitMapValidationFromDeserialization(t *testing.T) {
bm.AddRange(100, 110)
},
corruptor: func(s []byte) {
// 13 is the length of the run
// Setting to zero causes an invalid run
s[13] = 0
},
err: ErrRunIntervalLength,
err: ErrRunIntervalSize,
},
{
name: "Creates Interval Overlap",
Expand Down Expand Up @@ -3310,3 +3306,59 @@ func TestIssue467CaseLarge(t *testing.T) {
b.RunOptimize()
require.NoError(t, b.Validate())
}

func TestValidateEmpty(t *testing.T) {
require.NoError(t, New().Validate())
}

func TestValidate469(t *testing.T) {
b := New()
b.RemoveRange(0, 180)
b.AddRange(0, 180)
require.NoError(t, b.Validate())
b.RemoveRange(180, 217)
b.AddRange(180, 217)
require.NoError(t, b.Validate())
b.RemoveRange(217, 2394)
b.RemoveRange(2394, 2427)
b.AddRange(2394, 2427)
require.NoError(t, b.Validate())
b.RemoveRange(2427, 2428)
b.AddRange(2427, 2428)
require.NoError(t, b.Validate())
b.RemoveRange(2428, 3345)
require.NoError(t, b.Validate())
b.RemoveRange(3345, 3346)
require.NoError(t, b.Validate())
b.RemoveRange(3346, 3597)
require.NoError(t, b.Validate())
b.RemoveRange(3597, 3815)
require.NoError(t, b.Validate())
b.RemoveRange(3815, 3816)
require.NoError(t, b.Validate())
b.AddRange(3815, 3816)
require.NoError(t, b.Validate())
b.RemoveRange(3816, 3856)
b.RemoveRange(3856, 4067)
b.RemoveRange(4067, 4069)
b.RemoveRange(4069, 4071)
b.RemoveRange(4071, 4095)
b.RemoveRange(4095, 4096)
require.NoError(t, b.Validate())
b.RunOptimize()
require.False(t, b.IsEmpty())
require.NoError(t, b.Validate())
}

func TestValidateFromV1(t *testing.T) {
v1 := New()
for i := 0; i <= 2; i++ {
v1.Add(uint32(i))
}
v1.RunOptimize()
b, err := v1.MarshalBinary()
require.NoError(t, err)
v2 := New()
require.NoError(t, v2.UnmarshalBinary(b))
require.NoError(t, v2.Validate())
}
5 changes: 0 additions & 5 deletions roaringarray.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ const (
)

var (
ErrEmptyKeys = errors.New("keys were empty")
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)
Expand Down Expand Up @@ -798,10 +797,6 @@ func (ra *roaringArray) checkKeysSorted() bool {
// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray) validate() error {
if len(ra.keys) == 0 {
return ErrEmptyKeys
}

if !ra.checkKeysSorted() {
return ErrKeySortOrder
}
Expand Down
31 changes: 17 additions & 14 deletions runcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ type interval16 struct {
var (
ErrRunIntervalsEmpty = errors.New("run contained no interval")
ErrRunNonSorted = errors.New("runs were not sorted")
ErrRunIntervalLength = errors.New("interval had zero length")
ErrRunIntervalEqual = errors.New("intervals were equal")
ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
ErrRunIntervalSize = errors.New("too many intervals relative to data")
Expand Down Expand Up @@ -2757,10 +2756,9 @@ func (rc *runContainer16) validate() error {

intervalsSum := 0
for outeridx := range rc.iv {

if rc.iv[outeridx].length == 0 {
return ErrRunIntervalLength
}
// The length being stored is the actual length - 1.
// So we need to add 1 to get the actual length.
// It is not possible to have a run with length 0.

outerInterval := rc.iv[outeridx]

Expand Down Expand Up @@ -2794,15 +2792,20 @@ func (rc *runContainer16) validate() error {
check that the number of runs < (number of distinct values) / 2
(otherwise you could use an array container)
*/
if MaxIntervalsSum <= intervalsSum {
if !(len(rc.iv) < MaxNumIntervals) {
return ErrRunIntervalSize
}
} else {
if !(len(rc.iv) < (intervalsSum / 2)) {
return ErrRunIntervalSize
}
}

sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv))
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum)
fmt.Println(sizeAsRunContainer, sizeAsBitmapContainer, sizeAsArrayContainer)
// this is always ok:
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return nil
}
if sizeAsRunContainer >= sizeAsBitmapContainer {
return ErrRunIntervalSize
}
if sizeAsRunContainer >= sizeAsArrayContainer {
return ErrRunIntervalSize
}
return nil
}
3 changes: 1 addition & 2 deletions runcontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2588,13 +2588,12 @@ func TestIntervalValidationFailing(t *testing.T) {
start := -4
for i := 0; i < MaxNumIntervals; i++ {
start += 4
end := start + 2
end := start + 1
a := newInterval16Range(uint16(start), uint16(end))
rc.iv = append(rc.iv, a)

}
assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize)

// too many small runs, use array
rc = &runContainer16{}
start = -3
Expand Down
4 changes: 1 addition & 3 deletions serialization_frozen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,9 @@ func TestBitMapValidationFromFrozen(t *testing.T) {
bm.AddRange(100, 110)
},
corruptor: func(s []byte) {
// 13 is the length of the run
// Setting to zero causes an invalid run
s[2] = 0
},
err: ErrRunIntervalLength,
err: ErrRunIntervalSize,
},
{
name: "Creates Interval Overlap",
Expand Down

0 comments on commit a21b4b2

Please sign in to comment.