Skip to content

Commit 0562a19

Browse files
committed
nonstatic seed + move semantics
1 parent f1bd77c commit 0562a19

File tree

4 files changed

+91
-50
lines changed

4 files changed

+91
-50
lines changed

include/sketch/sketch_columns.h

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,19 @@ class FixedSizeSketchColumn {
1616
private:
1717
Bucket *buckets;
1818
Bucket deterministic_bucket = {0, 0};
19-
uint16_t col_idx; // determines column seeding
19+
uint64_t seed;
2020
uint8_t capacity;
2121
public:
22-
static uint64_t seed;
23-
static void set_seed(uint64_t new_seed) {
22+
void set_seed(uint64_t new_seed) {
2423
seed = new_seed;
2524
};
26-
static const uint64_t get_seed() {
25+
uint64_t get_seed() const {
2726
return seed;
2827
};
2928

30-
FixedSizeSketchColumn(uint8_t capacity, uint16_t col_idx);
29+
FixedSizeSketchColumn(uint8_t capacity, uint64_t seed);
3130
FixedSizeSketchColumn(const FixedSizeSketchColumn &other);
31+
FixedSizeSketchColumn(FixedSizeSketchColumn &&other);
3232
~FixedSizeSketchColumn();
3333
SketchSample<vec_t> sample() const;
3434
void clear();
@@ -60,10 +60,13 @@ class FixedSizeSketchColumn {
6060
}
6161
return true;
6262
}
63+
// move assignment operator
64+
FixedSizeSketchColumn& operator=(FixedSizeSketchColumn &&other);
65+
6366
friend std::ostream& operator<<(std::ostream &os, const FixedSizeSketchColumn &sketch) {
6467
os << "FixedSizeSketchColumn: " << std::endl;
6568
os << "Capacity: " << (int)sketch.capacity << std::endl;
66-
os << "Column Index: " << (int)sketch.col_idx << std::endl;
69+
os << "Column Seed: " << (int)sketch.seed << std::endl;
6770
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
6871
for (size_t i = 0; i < sketch.capacity; ++i) {
6972
os << "Bucket[" << i << "]: " << sketch.buckets[i] << std::endl;
@@ -76,17 +79,18 @@ class FixedSizeSketchColumn {
7679

7780
class ResizeableSketchColumn {
7881
private:
79-
static uint64_t seed;
8082
Bucket *buckets;
8183
Bucket deterministic_bucket = {0, 0};
82-
uint16_t col_idx; // determines column seeding
84+
uint64_t seed;
8385
uint8_t capacity;
8486
public:
85-
static void set_seed(uint64_t new_seed) { seed = new_seed; };
86-
static const uint64_t get_seed() { return seed; };
87+
void set_seed(uint64_t new_seed) { seed = new_seed; };
88+
uint64_t get_seed() const { return seed; };
8789

88-
ResizeableSketchColumn(uint8_t start_capacity, uint16_t col_idx);
90+
ResizeableSketchColumn(uint8_t start_capacity, uint64_t seed);
8991
ResizeableSketchColumn(const ResizeableSketchColumn &other);
92+
ResizeableSketchColumn(ResizeableSketchColumn &&other);
93+
ResizeableSketchColumn& operator=(ResizeableSketchColumn &&other);
9094
~ResizeableSketchColumn();
9195
SketchSample<vec_t> sample() const;
9296
void clear();
@@ -112,7 +116,7 @@ class ResizeableSketchColumn {
112116
friend std::ostream& operator<<(std::ostream &os, const ResizeableSketchColumn&sketch) {
113117
os << "ResizeableSketchColumn: " << std::endl;
114118
os << "Capacity: " << (int)sketch.capacity << std::endl;
115-
os << "Column Index: " << (int)sketch.col_idx << std::endl;
119+
os << "Column Seed: " << (int)sketch.seed << std::endl;
116120
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
117121
for (size_t i = 0; i < sketch.capacity; ++i) {
118122
os << "Bucket[" << i << "]: " << sketch.buckets[i] << std::endl;
@@ -139,17 +143,18 @@ class ResizeableSketchColumn {
139143

140144
class ResizeableAlignedSketchColumn {
141145
private:
142-
static uint64_t seed;
143146
hwy::AlignedFreeUniquePtr<Bucket[]> aligned_buckets;
144147
Bucket deterministic_bucket = {0, 0};
145-
uint16_t col_idx; // determines column seeding
148+
uint64_t seed;
146149
uint8_t capacity;
147150
public:
148-
static void set_seed(uint64_t new_seed) { seed = new_seed; };
149-
static const uint64_t get_seed() { return seed; };
151+
void set_seed(uint64_t new_seed) { seed = new_seed; };
152+
const uint64_t get_seed() { return seed; };
150153

151-
ResizeableAlignedSketchColumn(uint8_t start_capacity, uint16_t col_idx);
154+
ResizeableAlignedSketchColumn(uint8_t start_capacity, uint64_t seed);
152155
ResizeableAlignedSketchColumn(const ResizeableAlignedSketchColumn &other);
156+
ResizeableAlignedSketchColumn(ResizeableAlignedSketchColumn &&other);
157+
ResizeableAlignedSketchColumn& operator=(ResizeableAlignedSketchColumn &&other);
153158
~ResizeableAlignedSketchColumn();
154159
SketchSample<vec_t> sample() const;
155160
void clear();
@@ -175,7 +180,7 @@ class ResizeableAlignedSketchColumn {
175180
friend std::ostream& operator<<(std::ostream &os, const ResizeableAlignedSketchColumn&sketch) {
176181
os << "ResizeableSketchColumn: " << std::endl;
177182
os << "Capacity: " << (int)sketch.capacity << std::endl;
178-
os << "Column Index: " << (int)sketch.col_idx << std::endl;
183+
os << "Column Seed: " << (int)sketch.seed << std::endl;
179184
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
180185
for (size_t i = 0; i < sketch.capacity; ++i) {
181186
os << "Bucket[" << i << "]: " << sketch.aligned_buckets[i] << std::endl;

include/sketch/sketch_concept.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,13 @@ concept SketchColumnConcept = requires(T t, T other) {
5151
{ t.reset_sample_state()} -> std::same_as<void>;
5252
{ t == other } -> std::same_as<bool>;
5353
// copy constructor required
54-
requires std::constructible_from<T, const T&>;
55-
// constructor with capacity hint, column index for seeding
56-
requires std::constructible_from<T, uint8_t, uint16_t>;
54+
// requires std::constructible_from<T, const T&>;
55+
requires std::copy_constructible<T>;
56+
// move constructor and assignment required
57+
requires std::move_constructible<T>;
58+
requires std::assignable_from<T&, T>;
59+
// constructor with capacity hint, and a seed
60+
requires std::constructible_from<T, uint8_t, uint64_t>;
5761
{ T::suggest_capacity(std::declval<size_t>()) } -> std::same_as<uint8_t>;
5862
};
5963

src/sketch_columns.cpp

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,37 @@
11
#include "sketch/sketch_columns.h"
22

3-
FixedSizeSketchColumn::FixedSizeSketchColumn(uint8_t capacity, uint16_t col_idx) :
4-
capacity(capacity), col_idx(col_idx) {
3+
FixedSizeSketchColumn::FixedSizeSketchColumn(uint8_t capacity, uint64_t seed) :
4+
capacity(capacity), seed(seed) {
55
buckets = new Bucket[capacity];
66
std::memset(buckets, 0, capacity * sizeof(Bucket));
77
}
88

99
FixedSizeSketchColumn::FixedSizeSketchColumn(const FixedSizeSketchColumn &other) :
10-
capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
10+
capacity(other.capacity), seed(other.seed), deterministic_bucket(other.deterministic_bucket) {
1111
buckets = new Bucket[capacity];
1212
std::memcpy(buckets, other.buckets, capacity * sizeof(Bucket));
1313
}
1414

15+
FixedSizeSketchColumn::FixedSizeSketchColumn(FixedSizeSketchColumn &&other) :
16+
capacity(other.capacity), seed(other.seed), deterministic_bucket(other.deterministic_bucket) {
17+
buckets = std::move(other.buckets);
18+
}
19+
20+
FixedSizeSketchColumn& FixedSizeSketchColumn::operator=(FixedSizeSketchColumn &&other) {
21+
if (this != &other) {
22+
delete[] buckets;
23+
capacity = other.capacity;
24+
seed = other.seed;
25+
deterministic_bucket = other.deterministic_bucket;
26+
27+
buckets = other.buckets;
28+
other.buckets = nullptr;
29+
}
30+
return *this;
31+
}
32+
1533
FixedSizeSketchColumn::~FixedSizeSketchColumn() {
34+
// note nullptr is safe to delete
1635
delete[] buckets;
1736
}
1837

@@ -29,8 +48,8 @@ uint8_t FixedSizeSketchColumn::get_depth() const {
2948
void FixedSizeSketchColumn::serialize(std::ostream &binary_out) const {
3049
binary_out.write((char *) buckets, capacity * sizeof(Bucket));
3150
binary_out.write((char *) &deterministic_bucket, sizeof(Bucket));
51+
binary_out.write((char *) &seed, sizeof(uint64_t));
3252
binary_out.write((char *) &capacity, sizeof(uint8_t));
33-
binary_out.write((char *) &col_idx, sizeof(uint8_t));
3453
}
3554

3655
SketchSample<vec_t> FixedSizeSketchColumn::sample() const {
@@ -59,25 +78,45 @@ void FixedSizeSketchColumn::merge(FixedSizeSketchColumn &other) {
5978

6079
void FixedSizeSketchColumn::update(const vec_t update) {
6180
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update, seed);
62-
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed + col_idx, capacity-1);
81+
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed, capacity-1);
6382
// assert(depth < capacity);
6483
buckets[depth] ^= {update, checksum};
6584
deterministic_bucket ^= {update, checksum};
6685
}
6786

6887

69-
ResizeableSketchColumn::ResizeableSketchColumn(uint8_t start_capacity, uint16_t col_idx) :
70-
capacity(start_capacity), col_idx(col_idx) {
88+
ResizeableSketchColumn::ResizeableSketchColumn(uint8_t start_capacity, uint64_t seed) :
89+
capacity(start_capacity), seed(seed) {
7190
buckets = new Bucket[start_capacity];
7291
std::memset(buckets, 0, capacity * sizeof(Bucket));
7392
}
7493

7594
ResizeableSketchColumn::ResizeableSketchColumn(const ResizeableSketchColumn &other) :
76-
capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
95+
capacity(other.capacity), seed(other.seed), deterministic_bucket(other.deterministic_bucket) {
7796
buckets = new Bucket[capacity];
7897
std::memcpy(buckets, other.buckets, capacity * sizeof(Bucket));
7998
}
8099

100+
ResizeableSketchColumn::ResizeableSketchColumn(ResizeableSketchColumn &&other) :
101+
capacity(other.capacity), seed(other.seed), deterministic_bucket(other.deterministic_bucket) {
102+
// move constructor
103+
buckets = other.buckets;
104+
other.buckets = nullptr;
105+
}
106+
107+
ResizeableSketchColumn& ResizeableSketchColumn::operator=(ResizeableSketchColumn &&other) {
108+
if (this != &other) {
109+
delete[] buckets;
110+
capacity = other.capacity;
111+
seed = other.seed;
112+
deterministic_bucket = other.deterministic_bucket;
113+
114+
buckets = other.buckets;
115+
other.buckets = nullptr;
116+
}
117+
return *this;
118+
}
119+
81120
ResizeableSketchColumn::~ResizeableSketchColumn() {
82121
delete[] buckets;
83122
}
@@ -107,8 +146,8 @@ void ResizeableSketchColumn::clear() {
107146
void ResizeableSketchColumn::serialize(std::ostream &binary_out) const {
108147
binary_out.write((char *) buckets, capacity * sizeof(Bucket));
109148
binary_out.write((char *) &deterministic_bucket, sizeof(Bucket));
149+
binary_out.write((char *) &seed, sizeof(uint64_t));
110150
binary_out.write((char *) &capacity, sizeof(uint8_t));
111-
binary_out.write((char *) &col_idx, sizeof(uint8_t));
112151
}
113152

114153
SketchSample<vec_t> ResizeableSketchColumn::sample() const {
@@ -128,7 +167,7 @@ void ResizeableSketchColumn::update(const vec_t update) {
128167
// TODO - remove magic number
129168
// TODO - get_index_depth needs to be fixed. hashes need to be longer
130169
// than 32 bits if we're not using the deep bucket buffer idea.
131-
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed + col_idx, 60);
170+
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed, 60);
132171
deterministic_bucket ^= {update, checksum};
133172

134173
if (depth >= capacity) {
@@ -160,16 +199,16 @@ uint8_t ResizeableSketchColumn::get_depth() const {
160199

161200

162201

163-
ResizeableAlignedSketchColumn::ResizeableAlignedSketchColumn(uint8_t start_capacity, uint16_t col_idx) :
164-
capacity(start_capacity), col_idx(col_idx) {
202+
ResizeableAlignedSketchColumn::ResizeableAlignedSketchColumn(uint8_t start_capacity, uint64_t seed) :
203+
capacity(start_capacity), seed(seed) {
165204

166205
// auto aligned_memptr = hwy::MakeUniqueAlignedArray<Bucket>(start_capacity);
167206
aligned_buckets = hwy::AllocateAligned<Bucket>(start_capacity);
168207
std::memset(aligned_buckets.get(), 0, capacity * sizeof(Bucket));
169208
}
170209

171210
ResizeableAlignedSketchColumn::ResizeableAlignedSketchColumn(const ResizeableAlignedSketchColumn &other) :
172-
capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
211+
capacity(other.capacity), seed(other.seed), deterministic_bucket(other.deterministic_bucket) {
173212
aligned_buckets = hwy::AllocateAligned<Bucket>(capacity);
174213
std::memcpy(aligned_buckets.get(), other.aligned_buckets.get(), capacity * sizeof(Bucket));
175214
}
@@ -200,8 +239,8 @@ void ResizeableAlignedSketchColumn::clear() {
200239
void ResizeableAlignedSketchColumn::serialize(std::ostream &binary_out) const {
201240
binary_out.write((char *) aligned_buckets.get(), capacity * sizeof(Bucket));
202241
binary_out.write((char *) &deterministic_bucket, sizeof(Bucket));
242+
binary_out.write((char *) &seed, sizeof(uint64_t));
203243
binary_out.write((char *) &capacity, sizeof(uint8_t));
204-
binary_out.write((char *) &col_idx, sizeof(uint8_t));
205244
}
206245

207246
SketchSample<vec_t> ResizeableAlignedSketchColumn::sample() const {
@@ -221,7 +260,7 @@ void ResizeableAlignedSketchColumn::update(const vec_t update) {
221260
// TODO - remove magic number
222261
// TODO - get_index_depth needs to be fixed. hashes need to be longer
223262
// than 32 bits if we're not using the deep bucket buffer idea.
224-
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed + col_idx, 60);
263+
col_hash_t depth = Bucket_Boruvka::get_index_depth_legacy(update, seed, 60);
225264
deterministic_bucket ^= {update, checksum};
226265

227266
if (depth >= capacity) {
@@ -252,11 +291,6 @@ uint8_t ResizeableAlignedSketchColumn::get_depth() const {
252291
return 0;
253292
}
254293

255-
uint64_t ResizeableSketchColumn::seed = 0;
256-
uint64_t FixedSizeSketchColumn::seed = 0;
257-
uint64_t ResizeableAlignedSketchColumn::seed = 0;
258-
259-
260294

261295
static_assert(SketchColumnConcept<FixedSizeSketchColumn, vec_t>,
262296
"FixedSizeSketchColumn does not satisfy SketchColumnConcept");

test/sketch_test.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -478,13 +478,12 @@ TEST(SketchTestSuite, TestRawBucketUpdate) {
478478
ASSERT_GT(successes, 0);
479479
}
480480

481-
// using TestDefaultSketchColumn = FixedSizeSketchColumn;
482-
using TestDefaultSketchColumn = ResizeableSketchColumn;
481+
using TestDefaultSketchColumn = FixedSizeSketchColumn;
482+
// using TestDefaultSketchColumn = ResizeableSketchColumn;
483483

484484
TEST(SketchColumnTestSuite, TestSketchColumnSampling) {
485-
TestDefaultSketchColumn::set_seed(get_seed());
486485
// ResizeableSketchColumn::seed = get_seed();
487-
TestDefaultSketchColumn column(18, 0);
486+
TestDefaultSketchColumn column(18, get_seed());
488487
column.update(10);
489488
auto sample = column.sample();
490489
ASSERT_EQ(sample.result, GOOD);
@@ -503,10 +502,10 @@ TEST(SketchColumnTestSuite, TestSketchColumnSampling) {
503502

504503
TEST(SketchColumnTestSuite, TestSketchColumnMerging) {
505504
size_t num_nodes = 1 << 12;
506-
TestDefaultSketchColumn::set_seed(get_seed());
505+
auto seed = get_seed();
507506
for (size_t col_idx =0; col_idx < 16; col_idx++) {
508-
TestDefaultSketchColumn column1(TestDefaultSketchColumn::suggest_capacity(num_nodes), col_idx);
509-
TestDefaultSketchColumn column2(TestDefaultSketchColumn::suggest_capacity(num_nodes), col_idx);
507+
TestDefaultSketchColumn column1(TestDefaultSketchColumn::suggest_capacity(num_nodes), seed + col_idx);
508+
TestDefaultSketchColumn column2(TestDefaultSketchColumn::suggest_capacity(num_nodes), seed + col_idx);
510509
for (vec_t i = 0; i < (1 << 11); i++) {
511510
column1.update(i);
512511
column2.update(i + 128);
@@ -526,10 +525,9 @@ TEST(SketchColumnTestSuite, TestSketchColumnMerging) {
526525
}
527526

528527
TEST(SketchColumnTestSuite, TestSketchColumnMergeMany) {
529-
TestDefaultSketchColumn::set_seed(get_seed());
530528
std::vector<TestDefaultSketchColumn> columns(
531529
1 << 13, TestDefaultSketchColumn(
532-
TestDefaultSketchColumn::suggest_capacity(1 << 14), 0));
530+
TestDefaultSketchColumn::suggest_capacity(1 << 14), get_seed()));
533531
for (size_t i = 0; i < columns.size(); i++) {
534532
columns[i].update(i);
535533
}

0 commit comments

Comments
 (0)