Skip to content

Commit 11574b2

Browse files
authored
Merge pull request #530 from Chia-Network/fix-incremental-serialization
fix Serializer to correctly restore()
2 parents 2eee1cf + e03dde6 commit 11574b2

File tree

7 files changed

+163
-41
lines changed

7 files changed

+163
-41
lines changed

benches/serialize.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ fn serialize_benchmark(c: &mut Criterion) {
3535
group.bench_function(format!("Serializer {name}"), |b| {
3636
b.iter(|| {
3737
let start = Instant::now();
38-
let mut ser = Serializer::default();
39-
let _ = ser.add(&a, node, None);
38+
let mut ser = Serializer::new(None);
39+
let _ = ser.add(&a, node);
4040
black_box(ser.into_inner());
4141
start.elapsed()
4242
})

fuzz/fuzz_targets/incremental_serializer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
9494
{
9595
node_idx += 1;
9696

97-
let mut ser = Serializer::new();
98-
let (done, _) = ser.add(&allocator, first_step, Some(sentinel)).unwrap();
97+
let mut ser = Serializer::new(Some(sentinel));
98+
let (done, _) = ser.add(&allocator, first_step).unwrap();
9999
assert!(!done);
100-
let (done, _) = ser.add(&allocator, second_step, None).unwrap();
100+
let (done, _) = ser.add(&allocator, second_step).unwrap();
101101
assert!(done);
102102

103103
// now, make sure that we deserialize to the exact same structure, by

fuzz/fuzz_targets/serializer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
1919

2020
let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
2121

22-
let mut ser = Serializer::new();
23-
let (done, _) = ser.add(&allocator, program, None).unwrap();
22+
let mut ser = Serializer::new(None);
23+
let (done, _) = ser.add(&allocator, program).unwrap();
2424
assert!(done);
2525
let b2 = ser.into_inner();
2626

src/serde/identity_hash.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ impl Hasher for IdentityHash {
2525
}
2626
}
2727

28+
#[derive(Clone)]
2829
pub struct RandomState(u64);
2930

3031
impl Default for RandomState {

src/serde/incremental.rs

Lines changed: 152 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,33 +27,30 @@ pub struct Serializer {
2727
thc: ObjectCache<Bytes32>,
2828
slc: ObjectCache<u64>,
2929

30+
sentinel: Option<NodePtr>,
3031
output: Cursor<Vec<u8>>,
3132
}
3233

33-
impl Default for Serializer {
34-
fn default() -> Self {
35-
Self::new()
36-
}
37-
}
38-
3934
#[derive(Clone)]
4035
pub struct UndoState {
4136
read_op_stack: Vec<ReadOp>,
4237
write_stack: Vec<NodePtr>,
38+
read_cache_lookup: ReadCacheLookup,
4339
output_position: u64,
4440
}
4541

4642
/// The state to allow incrementally serializing CLVM structures with back-refs
4743
/// The compression cannot "see through" the sentinel node, so some compression
4844
/// opportunities may be missed when serializing and compressing incrementally.
4945
impl Serializer {
50-
pub fn new() -> Self {
46+
pub fn new(sentinel: Option<NodePtr>) -> Self {
5147
Self {
5248
read_op_stack: vec![ReadOp::Parse],
5349
write_stack: vec![],
5450
read_cache_lookup: ReadCacheLookup::new(),
5551
thc: ObjectCache::new(treehash),
5652
slc: ObjectCache::new(serialized_length),
53+
sentinel,
5754
output: Cursor::new(vec![]),
5855
}
5956
}
@@ -72,25 +69,21 @@ impl Serializer {
7269
/// beginning if this is the first call. Returns true when we're done
7370
/// serializing. i.e. no sentinel token was encountered. Once this function
7471
/// returns true, it may not be called again.
75-
pub fn add(
76-
&mut self,
77-
a: &Allocator,
78-
node: NodePtr,
79-
sentinel: Option<NodePtr>,
80-
) -> io::Result<(bool, UndoState)> {
72+
pub fn add(&mut self, a: &Allocator, node: NodePtr) -> io::Result<(bool, UndoState)> {
8173
// once we're done serializing (i.e. there was no sentinel in the last
8274
// call to add()), we can't resume
8375
assert!(!self.read_op_stack.is_empty());
8476

8577
let undo_state = UndoState {
8678
read_op_stack: self.read_op_stack.clone(),
8779
write_stack: self.write_stack.clone(),
80+
read_cache_lookup: self.read_cache_lookup.clone(),
8881
output_position: self.output.position(),
8982
};
9083
self.write_stack.push(node);
9184

9285
while let Some(node_to_write) = self.write_stack.pop() {
93-
if Some(node_to_write) == sentinel {
86+
if Some(node_to_write) == self.sentinel {
9487
// we're not done serializing yet, we're stopping, and the
9588
// caller will call add() again with the node to serialize
9689
// here
@@ -99,8 +92,9 @@ impl Serializer {
9992
let op = self.read_op_stack.pop();
10093
assert!(op == Some(ReadOp::Parse));
10194

102-
let node_serialized_length = self.slc.get_or_calculate(a, &node_to_write, sentinel);
103-
let node_tree_hash = self.thc.get_or_calculate(a, &node_to_write, sentinel);
95+
let node_serialized_length =
96+
self.slc.get_or_calculate(a, &node_to_write, self.sentinel);
97+
let node_tree_hash = self.thc.get_or_calculate(a, &node_to_write, self.sentinel);
10498
if let (Some(node_tree_hash), Some(node_serialized_length)) =
10599
(node_tree_hash, node_serialized_length)
106100
{
@@ -148,6 +142,7 @@ impl Serializer {
148142
pub fn restore(&mut self, state: UndoState) {
149143
self.read_op_stack = state.read_op_stack;
150144
self.write_stack = state.write_stack;
145+
self.read_cache_lookup = state.read_cache_lookup;
151146
self.output.set_position(state.output_position);
152147
self.output
153148
.get_mut()
@@ -168,8 +163,6 @@ impl Serializer {
168163
/// It's only valid to convert to the inner serialized form once
169164
/// serialization is complete. i.e. after add() returns true.
170165
pub fn into_inner(self) -> Vec<u8> {
171-
// if the sentinel is set, it means we're in the middle of serialization
172-
// still
173166
assert!(self.read_op_stack.is_empty());
174167
self.output.into_inner()
175168
}
@@ -192,18 +185,18 @@ mod tests {
192185
let item = node_from_bytes(&mut a, &hex!("ffff0102ff0304")).unwrap();
193186
let list = a.new_pair(item, sentinel).unwrap();
194187

195-
let mut ser = Serializer::new();
188+
let mut ser = Serializer::new(Some(sentinel));
196189
let mut size = ser.size();
197190
for _ in 0..10 {
198191
// this keeps returning false because we encounter a sentinel
199-
let (done, _) = ser.add(&a, list, Some(sentinel)).unwrap();
192+
let (done, _) = ser.add(&a, list).unwrap();
200193
assert!(!done);
201194
assert!(ser.size() > size);
202195
size = ser.size();
203196
}
204197

205198
// this returns true because we're done now
206-
let (done, _) = ser.add(&a, NodePtr::NIL, None).unwrap();
199+
let (done, _) = ser.add(&a, NodePtr::NIL).unwrap();
207200
assert!(done);
208201

209202
let output = ser.into_inner();
@@ -243,10 +236,10 @@ mod tests {
243236
let node5 = a.new_pair(node3, node4).unwrap();
244237
let item = a.new_pair(node2, node5).unwrap();
245238

246-
let mut ser = Serializer::new();
239+
let mut ser = Serializer::new(Some(sentinel));
247240
let mut size = ser.size();
248241

249-
let (done, _) = ser.add(&a, item, Some(sentinel)).unwrap();
242+
let (done, _) = ser.add(&a, item).unwrap();
250243
assert!(!done);
251244
assert!(ser.size() > size);
252245
size = ser.size();
@@ -261,14 +254,14 @@ mod tests {
261254

262255
for _ in 0..10 {
263256
// this keeps returning false because we encounter a sentinel
264-
let (done, _) = ser.add(&a, item, Some(sentinel)).unwrap();
257+
let (done, _) = ser.add(&a, item).unwrap();
265258
assert!(!done);
266259
assert!(ser.size() > size);
267260
size = ser.size();
268261
}
269262

270263
// this returns true because we're done now
271-
let (done, _) = ser.add(&a, NodePtr::NIL, None).unwrap();
264+
let (done, _) = ser.add(&a, NodePtr::NIL).unwrap();
272265
assert!(done);
273266

274267
// The "foobar" atom is serialized as 86666f6f626172
@@ -299,13 +292,13 @@ mod tests {
299292
let item = node_from_bytes(&mut a, &hex!("ffff0102ff0304")).unwrap();
300293
let list = a.new_pair(item, sentinel).unwrap();
301294

302-
let mut ser = Serializer::new();
303-
let (done, _) = ser.add(&a, list, Some(sentinel)).unwrap();
295+
let mut ser = Serializer::new(Some(sentinel));
296+
let (done, _) = ser.add(&a, list).unwrap();
304297
assert!(!done);
305298
assert_eq!(ser.size(), 8);
306299
assert_eq!(hex::encode(ser.get_ref()), "ffffff0102ff0304");
307300

308-
let (done, state) = ser.add(&a, NodePtr::NIL, None).unwrap();
301+
let (done, state) = ser.add(&a, NodePtr::NIL).unwrap();
309302
assert!(done);
310303
assert_eq!(ser.size(), 9);
311304
assert_eq!(hex::encode(ser.get_ref()), "ffffff0102ff030480");
@@ -315,17 +308,17 @@ mod tests {
315308
assert_eq!(ser.size(), 8);
316309
assert_eq!(hex::encode(ser.get_ref()), "ffffff0102ff0304");
317310

318-
let (done, _) = ser.add(&a, item, None).unwrap();
311+
let (done, _) = ser.add(&a, item).unwrap();
319312
assert!(done);
320313

321314
assert_eq!(ser.size(), 10);
322-
assert_eq!(hex::encode(ser.get_ref()), "ffffff0102ff0304fe04");
315+
assert_eq!(hex::encode(ser.get_ref()), "ffffff0102ff0304fe02");
323316

324317
ser.restore(state);
325318

326319
let item = a.new_small_number(1337).unwrap();
327320

328-
let (done, _) = ser.add(&a, item, None).unwrap();
321+
let (done, _) = ser.add(&a, item).unwrap();
329322

330323
assert!(done);
331324
assert_eq!(ser.size(), 11);
@@ -334,4 +327,132 @@ mod tests {
334327
let output = ser.into_inner();
335328
assert_eq!(hex::encode(&output), "ffffff0102ff0304820539");
336329
}
330+
331+
#[test]
332+
fn test_incremental_restore() {
333+
let mut a = Allocator::new();
334+
335+
let sentinel = a.new_pair(NodePtr::NIL, NodePtr::NIL).unwrap();
336+
// ((0x000000000000 . 0x111111111111) . (0x222222222222 . 0x333333333333))
337+
let item = node_from_bytes(
338+
&mut a,
339+
&hex!("ffff8600000000000086111111111111ff8622222222222286333333333333"),
340+
)
341+
.unwrap();
342+
let item1 = a.new_pair(item, sentinel).unwrap();
343+
344+
// ((0x111111111111 . 0x000000000000) . (0x222222222222 . 0x333333333333))
345+
let item = node_from_bytes(
346+
&mut a,
347+
&hex!("ffff8611111111111186000000000000ff8622222222222286333333333333"),
348+
)
349+
.unwrap();
350+
let item2 = a.new_pair(item, sentinel).unwrap();
351+
352+
// ((0x000000000000 . 0x111111111111) . (0x333333333333 . 0x222222222222))
353+
let item = node_from_bytes(
354+
&mut a,
355+
&hex!("ffff8600000000000086111111111111ff8633333333333386222222222222"),
356+
)
357+
.unwrap();
358+
let item3 = a.new_pair(item, sentinel).unwrap();
359+
360+
// add item1, item2, item3
361+
// restore to after item1
362+
// add item3, item2
363+
// terminate the list
364+
let mut ser = Serializer::new(Some(sentinel));
365+
let (done, _) = ser.add(&a, item1).unwrap();
366+
assert!(!done);
367+
println!("{}", hex::encode(ser.get_ref()));
368+
let (done, restore_state) = ser.add(&a, item2).unwrap();
369+
assert!(!done);
370+
println!("{}", hex::encode(ser.get_ref()));
371+
let (done, _) = ser.add(&a, item3).unwrap();
372+
assert!(!done);
373+
println!("{}", hex::encode(ser.get_ref()));
374+
println!("restore");
375+
ser.restore(restore_state);
376+
println!("{}", hex::encode(ser.get_ref()));
377+
378+
let (done, _) = ser.add(&a, item3).unwrap();
379+
assert!(!done);
380+
println!("{}", hex::encode(ser.get_ref()));
381+
let (done, _) = ser.add(&a, item2).unwrap();
382+
assert!(!done);
383+
println!("{}", hex::encode(ser.get_ref()));
384+
385+
let (done, _) = ser.add(&a, NodePtr::NIL).unwrap();
386+
assert!(done);
387+
println!("{}", hex::encode(ser.get_ref()));
388+
389+
let output = ser.into_inner();
390+
391+
{
392+
let mut a = Allocator::new();
393+
let result = node_from_bytes_backrefs(&mut a, &output).expect("invalid serialization");
394+
let roundtrip = node_to_bytes(&a, result).expect("failed to serialize");
395+
assert_eq!(
396+
hex::encode(roundtrip),
397+
"
398+
ff
399+
ff
400+
ff
401+
86000000000000
402+
86111111111111
403+
ff
404+
86222222222222
405+
86333333333333
406+
ff
407+
ff
408+
ff
409+
86000000000000
410+
86111111111111
411+
ff
412+
86333333333333
413+
86222222222222
414+
ff
415+
ff
416+
ff
417+
86111111111111
418+
86000000000000
419+
ff
420+
86222222222222
421+
86333333333333
422+
80"
423+
.chars()
424+
.filter(|c| !c.is_whitespace())
425+
.collect::<String>()
426+
);
427+
}
428+
429+
assert_eq!(
430+
hex::encode(output),
431+
"
432+
ff
433+
ff
434+
ff
435+
86000000000000
436+
86111111111111
437+
ff
438+
86222222222222
439+
86333333333333
440+
ff
441+
ff
442+
fe04
443+
ff
444+
fe1d
445+
fe2b
446+
ff
447+
ff
448+
ff
449+
fe0c
450+
fe11
451+
fe1b
452+
80"
453+
.chars()
454+
.filter(|c| !c.is_whitespace())
455+
.collect::<String>()
456+
);
457+
}
337458
}

src/serde/read_cache_lookup.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::collections::{HashMap, HashSet};
2222

2323
use super::bytes32::{hash_blob, hash_blobs, Bytes32};
2424

25-
#[derive(Debug)]
25+
#[derive(Debug, Clone)]
2626
pub struct ReadCacheLookup {
2727
root_hash: Bytes32,
2828

src/serde/test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ fn check_round_trip(obj_ser_br_hex: &str) {
4040
let mut allocator = Allocator::new();
4141
let obj = node_from_bytes(&mut allocator, &obj_ser_no_br_1).unwrap();
4242

43-
let mut serializer = Serializer::new();
44-
let (done, _) = serializer.add(&allocator, obj, None).unwrap();
43+
let mut serializer = Serializer::new(None);
44+
let (done, _) = serializer.add(&allocator, obj).unwrap();
4545
assert!(done);
4646
let obj_ser_br_2 = serializer.into_inner();
4747

0 commit comments

Comments
 (0)