Skip to content

Commit e0b7da0

Browse files
committed
initial commit for sparse sketch
1 parent 7b78f1f commit e0b7da0

File tree

3 files changed

+50
-7
lines changed

3 files changed

+50
-7
lines changed

include/graph_sketch_driver.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,6 @@ class GraphSketchDriver {
156156
size_t updates = stream->get_update_buffer(update_array, update_array_size);
157157

158158
if (update_array[updates - 1].type == BREAKPOINT) {
159-
#ifdef VERIFY_SAMPLES_F
160-
std::lock_guard<std::mutex> lk(verifier_mtx);
161-
verifier->combine(local_verifier);
162-
#endif
163159
--updates;
164160
got_breakpoint = true;
165161
}
@@ -173,7 +169,13 @@ class GraphSketchDriver {
173169
#endif
174170
}
175171

176-
if (got_breakpoint) return;
172+
if (got_breakpoint) {
173+
#ifdef VERIFY_SAMPLES_F
174+
std::lock_guard<std::mutex> lk(verifier_mtx);
175+
verifier->combine(local_verifier);
176+
#endif
177+
return;
178+
}
177179
}
178180
};
179181

@@ -204,6 +206,7 @@ class GraphSketchDriver {
204206
inline void batch_callback(int thr_id, node_id_t src_vertex,
205207
const std::vector<node_id_t> &dst_vertices) {
206208
total_updates += dst_vertices.size();
209+
return;
207210
sketching_alg->apply_update_batch(thr_id, src_vertex, dst_vertices);
208211
}
209212

include/sketch.h

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,19 @@
1111
#include "util.h"
1212
#include "bucket.h"
1313

14+
// TODO: Do we want to use row major or column major order?
15+
// TODO: How do we want to handle raw_bucket_merge() and get_readonly_bucket_ptr()?
16+
// These functions are nice for performance because we can skip serialization but aren't
17+
// strictly necessary.
18+
// TODO: It would be nice to preallocate the structure if we know how big its probably going to be.
19+
// This would be helpful for delta sketches for example.
20+
// TODO: What are we doing with the num_buckets variable? Could be nice to just be the size of
21+
// buckets array. Could also be upperbound on the size.
22+
23+
// A strategy that could work well would be to allocate a chunk of memory some of which is given to
24+
// the dense region of the sketch and 3 * sizeof(uint64_t) are given to sparse region.
25+
// 3 -> position, alpha, gamma (could save a little more space by using 16 bits for position)
26+
1427
// enum SerialType {
1528
// FULL,
1629
// RANGE,
@@ -43,14 +56,34 @@ class Sketch {
4356
size_t num_samples; // number of samples we can perform
4457
size_t cols_per_sample; // number of columns to use on each sample
4558
size_t num_columns; // Total number of columns. (product of above 2)
46-
size_t bkt_per_col; // number of buckets per column
59+
size_t bkt_per_col; // maximum number of buckets per column (max number of rows)
4760
size_t num_buckets; // number of total buckets (product of above 2)
4861

4962
size_t sample_idx = 0; // number of samples performed so far
5063

51-
// bucket data
64+
// bucket data, stored densely
5265
Bucket* buckets;
5366

67+
#ifndef L0_FULLY_DENSE
68+
size_t num_dense_rows = 4;
69+
70+
// sparse representation of lower levels of Matrix
71+
// TODO: Evaluate if this is shit. It probably is
72+
std::vector<std::unordered_map<size_t, Bucket>> bucket_buffer;
73+
size_t number_of_sparse_buckets = 0;
74+
size_t sparse_capacity = 2 * num_columns; // TODO: evaluate implications of this constant
75+
76+
/**
77+
* Reallocates the dense region of the sketch to have a different number of rows
78+
* @param new_num_rows the new number of rows to store densely
79+
*/
80+
void reallocate_dense_region(size_t new_num_rows);
81+
#endif
82+
83+
inline Bucket& get_deterministic_bucket() {
84+
// TODO: implement this
85+
}
86+
5487
public:
5588
/**
5689
* The below constructors use vector length as their input. However, in graph sketching our input

src/sketch.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,20 @@ Sketch::Sketch(vec_t vector_len, uint64_t seed, size_t _samples, size_t _cols) :
1111
num_columns = num_samples * cols_per_sample;
1212
bkt_per_col = calc_bkt_per_col(vector_len);
1313
num_buckets = num_columns * bkt_per_col + 1; // plus 1 for deterministic bucket
14+
15+
#ifndef L0_FULLY_DENSE
16+
buckets = new Bucket[num_columns * num_dense_rows];
17+
// TODO: AHHHHHHHHH
18+
#else
1419
buckets = new Bucket[num_buckets];
1520

1621
// initialize bucket values
1722
for (size_t i = 0; i < num_buckets; ++i) {
1823
buckets[i].alpha = 0;
1924
buckets[i].gamma = 0;
2025
}
26+
#endif
27+
2128
}
2229

2330
Sketch::Sketch(vec_t vector_len, uint64_t seed, std::istream &binary_in, size_t _samples,

0 commit comments

Comments
 (0)