forked from jvirkki/libbloom
-
Notifications
You must be signed in to change notification settings - Fork 1
/
bloom.h
164 lines (141 loc) · 4.57 KB
/
bloom.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*
* Copyright (c) 2012-2017, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
#ifndef _BLOOM_H
#define _BLOOM_H
#ifdef __cplusplus
extern "C" {
#endif
/** ***************************************************************************
* Structure to keep track of one bloom filter. Caller needs to
* allocate this and pass it to the functions below. First call for
* every struct must be to bloom_init().
*
*/
struct bloom
{
// These fields are part of the public interface of this structure.
// Client code may read these values if desired. Client code MUST NOT
// modify any of these.
int entries;
double error;
int bits;
int bytes;
int hashes;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
// on these.
double bpe;
unsigned char * bf;
int ready;
};
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* The filter is initialized with a bit field and number of hash functions
* according to the computations from the wikipedia entry:
* http://en.wikipedia.org/wiki/Bloom_filter
*
* Optimal number of bits is:
* bits = (entries * ln(error)) / ln(2)^2
*
* Optimal number of hash functions is:
* hashes = bpe * ln(2)
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* entries - The expected number of entries which will be inserted.
* Must be at least 1000 (in practice, likely much larger).
* error - Probability of collision (as long as entries are not
* exceeded).
*
* Return:
* -------
* 0 - on success
* 1 - on failure
*
*/
int bloom_init(struct bloom * bloom, int entries, double error);
/** ***************************************************************************
* Deprecated, use bloom_init()
*
*/
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size);
/** ***************************************************************************
* Check if the given element is in the bloom filter. Remember this may
* return false positive if a collision occured.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to check.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element is not present
* 1 - element is present (or false positive due to collision)
* -1 - bloom not initialized
*
*/
int bloom_check(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter.
* The return code indicates if the element (or a collision) was already in,
* so for the common check+add use case, no need to call check separately.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to add.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element was not present and was added
* 1 - element (or a collision) had already been added previously
* -1 - bloom not initialized
*
*/
int bloom_add(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter, returning the number of bits
* actually added. If the return value is 0 then it means that the item either
* exists or a collision has been encountered
*/
int bloom_add_retbits(struct bloom *bloom, const void *buffer, int len);
/** ***************************************************************************
* Print (to stdout) info about this bloom filter. Debugging aid.
*
*/
void bloom_print(struct bloom * bloom);
/** ***************************************************************************
* Deallocate internal storage.
*
* Upon return, the bloom struct is no longer usable. You may call bloom_init
* again on the same struct to reinitialize it again.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return: none
*
*/
void bloom_free(struct bloom * bloom);
/** ***************************************************************************
* Returns version string compiled into library.
*
* Return: version string
*
*/
const char * bloom_version();
#ifdef __cplusplus
}
#endif
#endif