Skip to content

Commit e882fc3

Browse files
authored
feat: allow caller-provided SipHash keys (#30)
* feat: allow caller-provided SipHash keys (#11) the siphash keys were hardcoded as 0xdeadbeaf and 0xfaebdaed. anyone can read these from the source and craft inputs that hash to the same bit positions, filling the filter faster and raising false positives. add NewWithKeys(k0, k1, ...) so callers can supply their own random keys (e.g. generated once per node). this restores the collision resistance that siphash is designed to provide. - sipHash.go: extract siphash constants and default keys, read k0/k1 from the Bloom struct instead of using hardcoded values - bbloom.go: add k0/k1 fields, add NewWithKeys constructor, persist custom keys in JSON (omitted when using defaults) - bbloom_test.go: tests for custom keys, JSON round-trip with custom keys, default keys omitted from JSON - doc.go: mention NewWithKeys for untrusted data * fix: copy siphash keys in marshal to avoid pointer aliasing marshal() was storing pointers to bl.k0/bl.k1 in the export struct. In JSONMarshalTS, json.Marshal dereferences these pointers outside the read lock, creating a potential data race. * feat: add NewWithBoolsetAndKeys for bitset import with custom keys - use it in JSONUnmarshal, requiring both K0 and K1 to be present * test: add NewWithBoolsetAndKeys coverage * fix: reject JSON with only one of K0/K1 in JSONUnmarshal marshal() always writes both keys or neither, but hand-edited or corrupted JSON could contain only one. silently falling back to default keys in that case would produce a filter that cannot find its own entries. error early instead. * doc: note that custom keys appear in plaintext in JSONMarshal output callers treating keys as secret should know they are included verbatim in the serialized filter.
1 parent 2e2ad1b commit e882fc3

File tree

4 files changed

+259
-10
lines changed

4 files changed

+259
-10
lines changed

bbloom.go

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ var (
5656
ErrInvalidParms = errors.New("one of the parameters was outside of allowed range")
5757
)
5858

59-
// New creates a bloom filter. It accepts exactly two float64 arguments:
59+
// New creates a bloom filter with default SipHash keys. It accepts exactly
60+
// two float64 arguments:
6061
//
6162
// - If the second parameter is < 1 it is treated as a false-positive rate,
6263
// and the filter is sized automatically.
@@ -65,6 +66,10 @@ var (
6566
// - If the second parameter is >= 1 it is treated as the number of hash
6667
// locations, and the first parameter is the bitset size.
6768
// Example: New(650000.0, 7.0) -- 650000-bit filter, 7 hash locations.
69+
//
70+
// The default SipHash keys are publicly known constants. If the filter will
71+
// hold data controlled by untrusted parties, use [NewWithKeys] instead to
72+
// prevent hash-flooding attacks.
6873
func New(params ...float64) (bloomfilter *Bloom, err error) {
6974
var entries, locs uint64
7075
if len(params) == 2 {
@@ -86,11 +91,40 @@ func New(params ...float64) (bloomfilter *Bloom, err error) {
8691
setLocs: locs,
8792
shift: 64 - exponent,
8893
bitset: make([]uint64, size>>6),
94+
k0: defaultK0,
95+
k1: defaultK1,
8996
hashVersion: 1,
9097
}
9198
return bloomfilter, nil
9299
}
93100

101+
// NewWithKeys creates a bloom filter with caller-provided SipHash keys.
102+
//
103+
// The default keys used by [New] are publicly known constants baked into the
104+
// source code. An attacker who knows the keys can craft inputs that all hash
105+
// to the same bit positions, filling the filter faster than normal and raising
106+
// the false-positive rate. This is a concern when the filter holds data
107+
// chosen by untrusted parties (e.g. content-addressed blocks fetched from
108+
// the network).
109+
//
110+
// Providing random, secret keys (e.g. generated once per node from
111+
// crypto/rand) restores SipHash's anti-collision guarantees and makes such
112+
// attacks infeasible.
113+
//
114+
// The params are interpreted the same way as in [New]. Custom keys are
115+
// preserved across [Bloom.JSONMarshal] / [JSONUnmarshal] round-trips.
116+
// Note: custom keys are included in plaintext in the [Bloom.JSONMarshal]
117+
// output, so treat serialized filters accordingly.
118+
func NewWithKeys(k0, k1 uint64, params ...float64) (*Bloom, error) {
119+
bf, err := New(params...)
120+
if err != nil {
121+
return nil, err
122+
}
123+
bf.k0 = k0
124+
bf.k1 = k1
125+
return bf, nil
126+
}
127+
94128
// NewWithBoolset creates a bloom filter from a pre-existing bitset.
95129
// bs is the serialized bitset (big-endian uint64 words) and locs is the
96130
// number of hash locations per entry.
@@ -105,12 +139,24 @@ func NewWithBoolset(bs []byte, locs uint64) (bloomfilter *Bloom) {
105139
return bloomfilter
106140
}
107141

142+
// NewWithBoolsetAndKeys creates a bloom filter from a pre-existing bitset
143+
// with caller-provided SipHash keys. See [NewWithKeys] for why custom keys
144+
// matter and [NewWithBoolset] for how the bitset is interpreted.
145+
func NewWithBoolsetAndKeys(bs []byte, locs, k0, k1 uint64) (bloomfilter *Bloom) {
146+
bloomfilter = NewWithBoolset(bs, locs)
147+
bloomfilter.k0 = k0
148+
bloomfilter.k1 = k1
149+
return bloomfilter
150+
}
151+
108152
// bloomJSONImExport
109153
// Im/Export structure used by JSONMarshal / JSONUnmarshal
110154
type bloomJSONImExport struct {
111155
FilterSet []byte
112156
SetLocs uint64
113-
Version uint8 `json:"Version,omitempty"`
157+
Version uint8 `json:"Version,omitempty"`
158+
K0 *uint64 `json:"K0,omitempty"`
159+
K1 *uint64 `json:"K1,omitempty"`
114160
}
115161

116162
// Bloom is a bloom filter backed by a power-of-two sized bitset.
@@ -125,7 +171,8 @@ type Bloom struct {
125171
shift uint64
126172

127173
content uint64
128-
hashVersion uint8 // 0 = legacy, 1 = l|=1 fix (issue #11)
174+
k0, k1 uint64 // SipHash keys
175+
hashVersion uint8 // 0 = legacy, 1 = l|=1 fix (issue #11)
129176
}
130177

131178
// ElementsAdded returns the number of elements added to the bloom filter.
@@ -256,6 +303,11 @@ func (bl *Bloom) marshal() bloomJSONImExport {
256303
bloomImEx := bloomJSONImExport{}
257304
bloomImEx.SetLocs = uint64(bl.setLocs)
258305
bloomImEx.Version = bl.hashVersion
306+
if bl.k0 != defaultK0 || bl.k1 != defaultK1 {
307+
k0, k1 := bl.k0, bl.k1
308+
bloomImEx.K0 = &k0
309+
bloomImEx.K1 = &k1
310+
}
259311
bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
260312
for i, w := range bl.bitset {
261313
binary.BigEndian.PutUint64(bloomImEx.FilterSet[i<<3:], w)
@@ -294,7 +346,15 @@ func JSONUnmarshal(dbData []byte) (*Bloom, error) {
294346
if err != nil {
295347
return nil, err
296348
}
297-
bf := NewWithBoolset(bloomImEx.FilterSet, bloomImEx.SetLocs)
349+
if (bloomImEx.K0 == nil) != (bloomImEx.K1 == nil) {
350+
return nil, errors.New("both K0 and K1 must be present or both absent")
351+
}
352+
var bf *Bloom
353+
if bloomImEx.K0 != nil && bloomImEx.K1 != nil {
354+
bf = NewWithBoolsetAndKeys(bloomImEx.FilterSet, bloomImEx.SetLocs, *bloomImEx.K0, *bloomImEx.K1)
355+
} else {
356+
bf = NewWithBoolset(bloomImEx.FilterSet, bloomImEx.SetLocs)
357+
}
298358
bf.hashVersion = bloomImEx.Version
299359
return bf, nil
300360
}

bbloom_test.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"math"
66
"os"
77
"strconv"
8+
"strings"
89
"testing"
910
)
1011

@@ -81,6 +82,7 @@ func TestM_JSON(t *testing.T) {
8182
t.Errorf("FAILED !AddIfNotHas = %v; want %v", cnt2, shallBe)
8283
}
8384
}
85+
8486
func TestSipHashLowAlwaysOdd(t *testing.T) {
8587
bf, err := New(float64(1<<20), float64(7))
8688
if err != nil {
@@ -96,6 +98,38 @@ func TestSipHashLowAlwaysOdd(t *testing.T) {
9698
}
9799
}
98100

101+
func TestNewWithKeys(t *testing.T) {
102+
k0 := uint64(0x0123456789abcdef)
103+
k1 := uint64(0xfedcba9876543210)
104+
105+
bf1, err := NewWithKeys(k0, k1, float64(n*10), float64(7))
106+
if err != nil {
107+
t.Fatal(err)
108+
}
109+
bf2, err := New(float64(n*10), float64(7))
110+
if err != nil {
111+
t.Fatal(err)
112+
}
113+
114+
// same entry should hash to different positions with different keys
115+
entry := []byte("test-entry")
116+
l1, h1 := bf1.sipHash(entry)
117+
l2, h2 := bf2.sipHash(entry)
118+
if l1 == l2 && h1 == h2 {
119+
t.Fatal("custom keys produced same hash as default keys")
120+
}
121+
122+
// filter should still work correctly with custom keys
123+
for i := range wordlist1 {
124+
bf1.Add(wordlist1[i])
125+
}
126+
for i := range wordlist1 {
127+
if !bf1.Has(wordlist1[i]) {
128+
t.Fatalf("Has(%q) = false after Add", wordlist1[i])
129+
}
130+
}
131+
}
132+
99133
func TestJSONBackwardCompatV0(t *testing.T) {
100134
// simulate a filter created with the legacy hash (version 0)
101135
bf, err := New(float64(n*10), float64(7))
@@ -129,6 +163,39 @@ func TestJSONBackwardCompatV0(t *testing.T) {
129163
}
130164
}
131165

166+
func TestNewWithKeysJSON(t *testing.T) {
167+
k0 := uint64(0x0123456789abcdef)
168+
k1 := uint64(0xfedcba9876543210)
169+
170+
bf, err := NewWithKeys(k0, k1, float64(n*10), float64(7))
171+
if err != nil {
172+
t.Fatal(err)
173+
}
174+
175+
entries := wordlist1[:1000]
176+
for _, e := range entries {
177+
bf.Add(e)
178+
}
179+
180+
data := bf.JSONMarshal()
181+
182+
bf2, err := JSONUnmarshal(data)
183+
if err != nil {
184+
t.Fatal(err)
185+
}
186+
187+
// keys should be preserved
188+
if bf2.k0 != k0 || bf2.k1 != k1 {
189+
t.Fatalf("keys not preserved: got k0=%x k1=%x, want k0=%x k1=%x", bf2.k0, bf2.k1, k0, k1)
190+
}
191+
192+
for _, e := range entries {
193+
if !bf2.Has(e) {
194+
t.Fatalf("custom-key filter lost entry %q after JSON round-trip", e)
195+
}
196+
}
197+
}
198+
132199
func TestJSONRoundTripV1(t *testing.T) {
133200
bf, err := New(float64(n*10), float64(7))
134201
if err != nil {
@@ -158,6 +225,110 @@ func TestJSONRoundTripV1(t *testing.T) {
158225
}
159226
}
160227

228+
func TestJSONUnmarshalPartialKeys(t *testing.T) {
229+
// Only K0 present, K1 absent -- should error, not silently fall back.
230+
jsonK0Only := []byte(`{"FilterSet":"AAAAAAAAAA==","SetLocs":3,"K0":42}`)
231+
if _, err := JSONUnmarshal(jsonK0Only); err == nil {
232+
t.Fatal("expected error for JSON with K0 but no K1")
233+
}
234+
235+
// Only K1 present, K0 absent.
236+
jsonK1Only := []byte(`{"FilterSet":"AAAAAAAAAA==","SetLocs":3,"K1":99}`)
237+
if _, err := JSONUnmarshal(jsonK1Only); err == nil {
238+
t.Fatal("expected error for JSON with K1 but no K0")
239+
}
240+
}
241+
242+
func TestDefaultKeysOmittedFromJSON(t *testing.T) {
243+
bf, err := New(float64(512), float64(3))
244+
if err != nil {
245+
t.Fatal(err)
246+
}
247+
bf.Add([]byte("test"))
248+
249+
data := bf.JSONMarshal()
250+
s := string(data)
251+
if strings.Contains(s, "K0") || strings.Contains(s, "K1") {
252+
t.Fatalf("default keys should not appear in JSON: %s", s)
253+
}
254+
255+
// custom keys should appear
256+
bf2, err := NewWithKeys(42, 99, float64(512), float64(3))
257+
if err != nil {
258+
t.Fatal(err)
259+
}
260+
bf2.Add([]byte("test"))
261+
262+
data2 := bf2.JSONMarshal()
263+
s2 := string(data2)
264+
if !strings.Contains(s2, "K0") || !strings.Contains(s2, "K1") {
265+
t.Fatalf("custom keys should appear in JSON: %s", s2)
266+
}
267+
}
268+
269+
func TestNewWithBoolsetAndKeys(t *testing.T) {
270+
k0 := uint64(0x0123456789abcdef)
271+
k1 := uint64(0xfedcba9876543210)
272+
entries := wordlist1[:1000]
273+
274+
// Build a reference filter with custom keys and populate it.
275+
ref, err := NewWithKeys(k0, k1, float64(n*10), float64(7))
276+
if err != nil {
277+
t.Fatal(err)
278+
}
279+
for _, e := range entries {
280+
ref.Add(e)
281+
}
282+
283+
// Export the raw bitset so we can reconstruct with NewWithBoolsetAndKeys.
284+
rawBitset := ref.JSONMarshal()
285+
refImport, err := JSONUnmarshal(rawBitset)
286+
if err != nil {
287+
t.Fatal(err)
288+
}
289+
290+
t.Run("keys are stored", func(t *testing.T) {
291+
// NewWithBoolsetAndKeys must propagate k0/k1 into the Bloom struct,
292+
// otherwise all lookups will use the wrong hash positions.
293+
got := NewWithBoolsetAndKeys(make([]byte, 64), 7, k0, k1)
294+
if got.k0 != k0 || got.k1 != k1 {
295+
t.Fatalf("keys not set: got k0=%x k1=%x, want k0=%x k1=%x",
296+
got.k0, got.k1, k0, k1)
297+
}
298+
})
299+
300+
t.Run("entries survive bitset round-trip", func(t *testing.T) {
301+
// A filter rebuilt from the same bitset and keys must recognize
302+
// every entry that was added to the original.
303+
for _, e := range entries {
304+
if !refImport.Has(e) {
305+
t.Fatalf("entry %q lost after round-trip", e)
306+
}
307+
}
308+
})
309+
310+
t.Run("wrong keys miss entries", func(t *testing.T) {
311+
// Unmarshal the custom-key filter, then force default keys.
312+
// Lookups must fail, proving the keys actually affect hashing.
313+
wrong, err := JSONUnmarshal(ref.JSONMarshal())
314+
if err != nil {
315+
t.Fatal(err)
316+
}
317+
wrong.k0 = defaultK0
318+
wrong.k1 = defaultK1
319+
320+
misses := 0
321+
for _, e := range entries {
322+
if !wrong.Has(e) {
323+
misses++
324+
}
325+
}
326+
if misses == 0 {
327+
t.Fatal("default keys matched every entry; custom keys had no effect")
328+
}
329+
})
330+
}
331+
161332
func TestFillRatio(t *testing.T) {
162333
bf, err := New(float64(512), float64(7))
163334
if err != nil {

doc.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
// bitset up to the next power of two for fast masking, and provides both
1010
// non-thread-safe and mutex-protected (TS-suffixed) variants of all operations.
1111
//
12+
// By default ([New]) the filter uses publicly known SipHash keys. When the
13+
// filter will hold data controlled by untrusted parties, use [NewWithKeys]
14+
// with random secret keys to prevent hash-flooding attacks.
15+
//
1216
// Filters can be serialized to JSON with [Bloom.JSONMarshal] and restored
1317
// with [JSONUnmarshal].
1418
package bbloom

sipHash.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,28 @@
1010

1111
package bbloom
1212

13-
// Hash returns the 64-bit SipHash-2-4 of the given byte slice with two 64-bit
14-
// parts of 128-bit key: k0 and k1.
13+
// SipHash-2-4 initialization constants.
14+
const (
15+
sipC0 = 0x736f6d6570736575
16+
sipC1 = 0x646f72616e646f6d
17+
sipC2 = 0x6c7967656e657261
18+
sipC3 = 0x7465646279746573
19+
)
20+
21+
// Default SipHash keys (the original hardcoded values: 0xdeadbeaf, 0xfaebdaed).
22+
const (
23+
defaultK0 = uint64(0xdeadbeaf)
24+
defaultK1 = uint64(0xfaebdaed)
25+
)
26+
27+
// sipHash returns the 64-bit SipHash-2-4 of the given byte slice using
28+
// the bloom filter's k0/k1 keys, split into two parts for double-hashing.
1529
func (bl *Bloom) sipHash(p []byte) (l, h uint64) {
1630
// Initialization.
17-
v0 := uint64(8317987320269560794) // k0 ^ 0x736f6d6570736575
18-
v1 := uint64(7237128889637516672) // k1 ^ 0x646f72616e646f6d
19-
v2 := uint64(7816392314733513934) // k0 ^ 0x6c7967656e657261
20-
v3 := uint64(8387220255325274014) // k1 ^ 0x7465646279746573
31+
v0 := bl.k0 ^ sipC0
32+
v1 := bl.k1 ^ sipC1
33+
v2 := bl.k0 ^ sipC2
34+
v3 := bl.k1 ^ sipC3
2135
t := uint64(len(p)) << 56
2236

2337
// Compression.

0 commit comments

Comments
 (0)