@@ -20,7 +20,8 @@ Sketch::Sketch(vec_t vector_len, uint64_t seed, size_t _samples, size_t _cols) :
2020 num_columns = num_samples * cols_per_sample;
2121 bkt_per_col = calc_bkt_per_col (vector_len);
2222 num_buckets = num_columns * bkt_per_col + 1 ; // plus 1 for deterministic bucket
23- bucket_buffer = BucketBuffer ();
23+ // bucket_buffer = BucketBuffer();
24+ bucket_buffer = BucketBufferHashMap ();
2425#ifdef EAGER_BUCKET_CHECK
2526 buckets = (Bucket*) (new char [bucket_array_bytes ()]);
2627 nonempty_buckets = (vec_t *) (buckets + num_buckets);
@@ -53,7 +54,7 @@ Sketch::Sketch(vec_t vector_len, uint64_t seed, bool compressed, std::istream &b
5354 // bkt_per_col = calc_bkt_per_col(vector_len);
5455 num_buckets = num_columns * bkt_per_col + 1 ; // plus 1 for deterministic bucket
5556 // bucket_buffer = BucketBuffer(new BufferEntry[_cols * 2], _cols * 2);
56- bucket_buffer = BucketBuffer ();
57+ bucket_buffer = BucketBufferHashMap ();
5758 buckets = (Bucket*) new char [bucket_array_bytes ()];
5859#ifdef EAGER_BUCKET_CHECK
5960 nonempty_buckets = (vec_t *) (buckets + num_buckets);
@@ -116,7 +117,7 @@ Sketch::Sketch(vec_t vector_len, uint64_t seed, std::istream &binary_in, size_t
116117 bkt_per_col = calc_bkt_per_col (vector_len);
117118 // bkt_per_col = 1;
118119 num_buckets = num_columns * bkt_per_col + 1 ; // plus 1 for deterministic bucket
119- bucket_buffer = BucketBuffer ();
120+ bucket_buffer = BucketBufferHashMap ();
120121 buckets = (Bucket*) new char [bucket_array_bytes ()];
121122#ifdef EAGER_BUCKET_CHECK
122123 nonempty_buckets = (vec_t *) (buckets + num_buckets);
@@ -132,7 +133,7 @@ Sketch::Sketch(const Sketch &s) : seed(s.seed) {
132133 bkt_per_col = s.bkt_per_col ;
133134 num_buckets = s.num_buckets ;
134135 // TODO - do this correctly in other places. Otherwise serialization is broken
135- bucket_buffer = BucketBuffer ();
136+ bucket_buffer = BucketBufferHashMap ();
136137 buckets = (Bucket*) new char [bucket_array_bytes ()];
137138 // buckets = new Bucket[num_buckets];
138139
@@ -195,21 +196,18 @@ Sketch::~Sketch() {
195196 * backwards until we reach the point where the columns are once again not
196197 * being stored
197198 */
198- // bucket_buffer.sort_and_compact();
199- size_t buffer_size = bucket_buffer.size ();
200- // ACTUALLY - we dont need to sort. just need to partition
201- size_t to_keep_sz = bucket_buffer.partition (bkt_per_col);
202- int i = ((int ) buffer_size)-1 ;
203- // while (i >= 0 && bucket_buffer[i].row_idx < bkt_per_col) {
204- while (i >= 0 && i >= to_keep_sz) {
205- // update the bucket
206- get_bucket (bucket_buffer[i].col_idx , bucket_buffer[i].row_idx ) ^= bucket_buffer[i].value ;
207- i--;
208- }
209- bucket_buffer.entries .resize (to_keep_sz);
210- // bucket_buffer.entries.resize(i+1);
211- // if (buffer_size > 3)
212- // std::cout << "Injected buffer buckets:" << buffer_size << " to " << i+1 << std::endl;
199+ auto it = bucket_buffer.entries .begin ();
200+ while (it != bucket_buffer.entries .end ()) {
201+ if (bucket_buffer.key_to_row (it->first ) >= bkt_per_col) {
202+ get_bucket (
203+ bucket_buffer.key_to_col (it->first ),
204+ bucket_buffer.key_to_row (it->first )
205+ ) ^= it->second ;
206+ it = bucket_buffer.entries .erase (it);
207+ } else {
208+ it++;
209+ }
210+ }
213211 }
214212
215213
@@ -322,18 +320,15 @@ SketchSample Sketch::sample() {
322320 }
323321 }
324322 // finally, check the deep buffer
325- for (size_t i = 0 ; i < bucket_buffer.size (); i++) {
326- const BufferEntry &entry = bucket_buffer[i];
327- // TODO - optimize this check. THIS IS GONNA CAUSE REALLY POOR
328- // PERFORMANCE UNTIL WE DO SOMETHING ABOUT IT
329- if (entry.col_idx >= first_column && entry.col_idx < first_column + cols_per_sample) {
330- if (Bucket_Boruvka::is_good (entry.value , checksum_seed ())) {
331- // std::cout << "Found a bucket in the buffer" << std::endl;
332- assert (entry.row_idx >= bkt_per_col);
333- return {entry.value .alpha , GOOD};
334- }
323+ for (size_t col = first_column; col < first_column + cols_per_sample; ++col) {
324+ for (size_t row = bkt_per_col; row < bkt_per_col + 6 ; ++row) {
325+ Bucket bucket = bucket_buffer.get_bucket (col, row);
326+ // Bucket &bucket = bucket_buffer.get_bucket(col, row);
327+ if (Bucket_Boruvka::is_good (bucket, checksum_seed ()))
328+ return {bucket.alpha , GOOD};
335329 }
336330 }
331+
337332 return {0 , FAIL};
338333}
339334
0 commit comments