Skip to content

Commit

Permalink
When deserializing a 64-bit roaring bitmap, the code would first try …
Browse files Browse the repository at this point in the history
…to deserialize a 32-bit bitmap. It would usually work, but sometimes it would fail (see #409) because the number of containers would be recognized as a cookie.

This PR just disables this attempt at supporting 32-bit bitmaps loading from 64-bit bitmaps.
lemire committed Jan 23, 2024

Verified

This commit was signed with the committer’s verified signature.
Zaczero Kamil Monicz
1 parent db60103 commit ea47821
Showing 2 changed files with 10 additions and 172 deletions.
84 changes: 10 additions & 74 deletions roaring64/roaring64.go
Original file line number Diff line number Diff line change
@@ -86,23 +86,13 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
// This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually.
func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
stream := internal.NewByteBuffer(data)

cookie, r32, p, err := tryReadFromRoaring32ByteBuffer(rb, stream)
if err != nil {
return p, err
} else if r32 {
return p, nil
}

var sizeBuf [8]byte // Avoid changing the original byte slice.
sizeBuf2, err := stream.Next(4)
sizeBuf := make([]byte, 8)
n, err := stream.Read(sizeBuf)
if err != nil {
return 0, fmt.Errorf("error in bitmap.UnsafeFromBytes: could not read number of containers: %w", err)
return 0, err
}
p += 4
copy(sizeBuf[:], cookie)
copy(sizeBuf[4:], sizeBuf2)
size := binary.LittleEndian.Uint64(sizeBuf[:])
p += int64(n)
size := binary.LittleEndian.Uint64(sizeBuf)

rb.highlowcontainer.resize(0)
if cap(rb.highlowcontainer.keys) >= int(size) {
@@ -138,51 +128,21 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
return p, nil
}

func tryReadFromRoaring32ByteBuffer(rb *Bitmap, stream *internal.ByteBuffer) (cookie []byte, r32 bool, p int64, err error) {
// Verify the first two bytes are a valid MagicNumber.
cookie, err = stream.Next(4)
if err != nil {
return cookie, false, 0, err
}
fileMagic := int(binary.LittleEndian.Uint16(cookie[0:2]))
if fileMagic == serialCookieNoRunContainer || fileMagic == serialCookie {
bm32 := roaring.NewBitmap()
p, err = bm32.ReadFrom(stream, cookie...)
if err != nil {
return
}
// Try reuse the underlying slices.
rb.highlowcontainer.resize(0)
rb.highlowcontainer.keys = append(rb.highlowcontainer.keys, 0)
rb.highlowcontainer.containers = append(rb.highlowcontainer.containers, bm32)
rb.highlowcontainer.needCopyOnWrite = append(rb.highlowcontainer.needCopyOnWrite, false)
return cookie, true, p, nil
}
return
}

// ReadFrom reads a serialized version of this bitmap from stream.
// The format is compatible with other 64-bit RoaringBitmap
// implementations (Java, Go, C++) and it has a specification :
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
cookie, r32, p, err := tryReadFromRoaring32(rb, stream)
if err != nil {
return p, err
} else if r32 {
return p, nil
}
// TODO: Add buffer interning as in base roaring package.

sizeBuf := make([]byte, 4)
sizeBuf := make([]byte, 8)
var n int
fmt.Println("trying to read 8 bytes")
n, err = stream.Read(sizeBuf)
fmt.Println("read: ", n, " bytes")

if n == 0 || err != nil {
return int64(n), fmt.Errorf("error in bitmap.readFrom: could not read number of containers: %s", err)
return int64(n), err
}
p += int64(n)
sizeBuf = append(cookie, sizeBuf...)

size := binary.LittleEndian.Uint64(sizeBuf)
rb.highlowcontainer.resize(0)
if cap(rb.highlowcontainer.keys) >= int(size) {
@@ -219,30 +179,6 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
return p, nil
}

func tryReadFromRoaring32(rb *Bitmap, stream io.Reader) (cookie []byte, r32 bool, p int64, err error) {
// Verify the first two bytes are a valid MagicNumber.
cookie = make([]byte, 4)
size, err := stream.Read(cookie)
if err != nil {
return cookie, false, int64(size), err
}
fileMagic := int(binary.LittleEndian.Uint16(cookie[0:2]))
if fileMagic == serialCookieNoRunContainer || fileMagic == serialCookie {
bm32 := roaring.NewBitmap()
p, err = bm32.ReadFrom(stream, cookie...)
if err != nil {
return
}
// Try reuse the underlying slices.
rb.highlowcontainer.resize(0)
rb.highlowcontainer.keys = append(rb.highlowcontainer.keys, 0)
rb.highlowcontainer.containers = append(rb.highlowcontainer.containers, bm32)
rb.highlowcontainer.needCopyOnWrite = append(rb.highlowcontainer.needCopyOnWrite, false)
return cookie, true, p, nil
}
return
}

// FromBuffer creates a bitmap from its serialized version stored in buffer
// func (rb *Bitmap) FromBuffer(data []byte) (p int64, err error) {
//
98 changes: 0 additions & 98 deletions roaring64/roaring64_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
package roaring64

import (
"fmt"
"io/ioutil"
"math"
"math/rand"
"os"
"path/filepath"
"strconv"
"testing"

@@ -1984,97 +1980,3 @@ func IntsEquals(a, b []uint64) bool {
}
return true
}

func Test_tryReadFromRoaring32(t *testing.T) {
r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1)
bs, err := r32.ToBytes()
if err != nil {
t.Fatal(err)
}
r64 := NewBitmap()
assert.True(t, r64.UnmarshalBinary(bs) == nil)
assert.True(t, r64.Contains(1))
assert.True(t, r64.Contains(2))
assert.True(t, r64.Contains(65535))
assert.True(t, r64.Contains(math.MaxUint32-1))

}

func Test_tryReadFromRoaring32_File(t *testing.T) {
tempDir, err := ioutil.TempDir("./", "testdata")
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
}
defer os.RemoveAll(tempDir)

r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1)
bs, err := r32.ToBytes()
if err != nil {
t.Fatal(err)
}
name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
}
defer file.Close()

r64 := NewBitmap()
r64.ReadFrom(file)
assert.True(t, r64.Contains(1))
assert.True(t, r64.Contains(2))
assert.True(t, r64.Contains(65535))
assert.True(t, r64.Contains(math.MaxUint32-1))
}

func Test_tryReadFromRoaring32WithRoaring64(t *testing.T) {
r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64)
bs, err := r64.ToBytes()
if err != nil {
t.Fatal(err)
}
nr64 := NewBitmap()
assert.True(t, nr64.UnmarshalBinary(bs) == nil)
assert.True(t, nr64.Contains(1))
assert.True(t, nr64.Contains(65535))
assert.True(t, nr64.Contains(math.MaxUint32))
assert.True(t, nr64.Contains(math.MaxUint64))
}

func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) {
tempDir, err := ioutil.TempDir("./", "testdata")
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
}
defer os.RemoveAll(tempDir)

r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64)
bs, err := r64.ToBytes()
if err != nil {
t.Fatal(err)
}

name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
}
defer file.Close()

nr64 := NewBitmap()
nr64.ReadFrom(file)
assert.True(t, nr64.Contains(1))
assert.True(t, nr64.Contains(65535))
assert.True(t, nr64.Contains(math.MaxUint32))
assert.True(t, nr64.Contains(math.MaxUint64))
}

0 comments on commit ea47821

Please sign in to comment.