-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
BloomFilter.cs
85 lines (77 loc) · 3.19 KB
/
BloomFilter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
using System;
using System.Collections.Generic;
namespace DataStructures.Probabilistic;
public class BloomFilter<T> where T : notnull
{
private const uint FnvPrime = 16777619;
private const uint FnvOffsetBasis = 2166136261;
private readonly byte[] filter;
private readonly int numHashes;
private readonly int sizeBits;
/// <summary>
/// Initializes a new instance of the <see cref="BloomFilter{T}"/> class. This constructor will create a Bloom Filter
/// of an optimal size with the optimal number of hashes to minimize the error rate.
/// </summary>
/// <param name="expectedNumElements">Expected number of unique elements that could be added to the filter.</param>
public BloomFilter(int expectedNumElements)
{
numHashes = (int)Math.Ceiling(.693 * 8 * expectedNumElements / expectedNumElements); // compute optimal number of hashes
filter = new byte[expectedNumElements]; // set up filter with 8 times as many bits as elements
sizeBits = expectedNumElements * 8; // number of bit slots in the filter
}
/// <summary>
/// Initializes a new instance of the <see cref="BloomFilter{T}"/> class.
/// This constructor let's you decide how large you want the filter to be as well as allowing you to specify
/// how many hashes it will use. Only use if you don't care to optimize false positivity.
/// </summary>
/// <param name="sizeBits">size in bits you want the filter to be.</param>
/// <param name="numHashes">number of hash functions to be used.</param>
public BloomFilter(int sizeBits, int numHashes)
{
filter = new byte[sizeBits / 8 + 1];
this.numHashes = numHashes;
this.sizeBits = sizeBits;
}
/// <summary>
/// Inserts an item into the bloom filter.
/// </summary>
/// <param name="item">The item being inserted into the Bloom Filter.</param>
public void Insert(T item)
{
foreach (var slot in GetSlots(item))
{
filter[slot / 8] |= (byte)(1 << (slot % 8)); // set the filter at the decided slot to 1.
}
}
/// <summary>
/// Searches the Bloom Filter to determine if the item exists in the Bloom Filter.
/// </summary>
/// <param name="item">The item being searched for in the Bloom Filter.</param>
/// <returns>true if the item has been added to the Bloom Filter, false otherwise.</returns>
public bool Search(T item)
{
foreach (var slot in GetSlots(item))
{
var @byte = filter[slot / 8]; // Extract the byte in the filter.
var mask = 1 << (slot % 8); // Build the mask for the slot number.
if ((@byte & mask) != mask)
{
return false;
}
}
return true;
}
/// <summary>
/// Yields the appropriate slots for the given item.
/// </summary>
/// <param name="item">The item to determine the slots for.</param>
/// <returns>The slots of the filter to flip or check.</returns>
private IEnumerable<int> GetSlots(T item)
{
var hash = item.GetHashCode();
for (var i = 0; i < numHashes; i++)
{
yield return Math.Abs((i + 1) * hash) % sizeBits;
}
}
}