diff --git a/Cargo.lock b/Cargo.lock index a2a5c86a..47d6dac8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,9 +74,12 @@ dependencies = [ [[package]] name = "arbitrary" -version = "1.3.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "autocfg" @@ -311,6 +314,7 @@ dependencies = [ name = "clvm_rs-fuzz" version = "1.0.0" dependencies = [ + "arbitrary", "chia-sha2", "clvmr", "hex", @@ -480,6 +484,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "digest" version = "0.10.7" diff --git a/Cargo.toml b/Cargo.toml index f88282ce..d32ee52f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ serde_json = "1.0.133" clap = "4.5.20" rand_chacha = "0.3.1" bitvec = "1.0.1" +arbitrary = { version = "1.4.1", features = ["derive"] } [dependencies] lazy_static = { workspace = true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index c794a338..4ebc5daa 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -13,6 +13,7 @@ libfuzzer-sys = { workspace = true } clvmr = { workspace = true } chia-sha2 = { workspace = true } hex = { workspace = true } +arbitrary = { workspace = true } [[bin]] name = "fuzz_run_program" diff --git a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs index 8397b7de..95532218 100644 --- a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs +++ b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs @@ -1,17 +1,17 @@ #![no_main] -mod fuzzing_utils; +mod make_tree; use clvmr::allocator::Allocator; use clvmr::serde::node_from_bytes_backrefs; use clvmr::serde::node_to_bytes_backrefs; use libfuzzer_sys::fuzz_target; -fn do_fuzz(data: &[u8], short_atoms: bool) { +fuzz_target!(|data: &[u8]| { let mut allocator = Allocator::new(); - let mut cursor = fuzzing_utils::BitCursor::new(data); + let mut unstructured = arbitrary::Unstructured::new(data); - let program = fuzzing_utils::make_tree(&mut allocator, &mut cursor, short_atoms); + let program = make_tree::make_tree(&mut allocator, &mut unstructured); let b1 = node_to_bytes_backrefs(&allocator, program).unwrap(); @@ -22,9 +22,4 @@ fn do_fuzz(data: &[u8], short_atoms: bool) { if b1 != b2 { panic!("b1 and b2 do not match"); } -} - -fuzz_target!(|data: &[u8]| { - do_fuzz(data, true); - do_fuzz(data, false); }); diff --git a/fuzz/fuzz_targets/fuzzing_utils.rs b/fuzz/fuzz_targets/fuzzing_utils.rs index 24e30e05..b7656bca 100644 --- a/fuzz/fuzz_targets/fuzzing_utils.rs +++ b/fuzz/fuzz_targets/fuzzing_utils.rs @@ -1,83 +1,6 @@ use chia_sha2::Sha256; use clvmr::allocator::{Allocator, NodePtr, SExp}; -pub struct BitCursor<'a> { - data: &'a [u8], - bit_offset: u8, -} - -fn mask(num: u8) -> u8 { - 0xff >> num -} - -impl<'a> BitCursor<'a> { - pub fn new(data: &'a [u8]) -> Self { - BitCursor { - data, - bit_offset: 0, - } - } - - pub fn read_bits(&mut self, mut num: u8) -> Option { - assert!(num <= 8); - let ret = if self.data.is_empty() { - num = 0; - None - } else if self.bit_offset + num <= 8 { - Some((self.data[0] & mask(self.bit_offset)) >> (8 - num - self.bit_offset)) - } else if self.data.len() < 2 { - num = 8 - self.bit_offset; - Some(self.data[0] & mask(self.bit_offset)) - } else { - let first_byte = 8 - self.bit_offset; - let second_byte = num - first_byte; - Some( - ((self.data[0] & mask(self.bit_offset)) << second_byte) - | (self.data[1] >> (8 - second_byte)), - ) - }; - self.advance(num); - ret - } - - fn advance(&mut self, bits: u8) { - let bits = self.bit_offset as u32 + bits as u32; - if bits >= 8 { - self.data = &self.data[(bits / 8) as usize..]; - } - self.bit_offset = (bits % 8) as u8; - } -} - -const BUFFER: [u8; 63] = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -]; - -pub fn make_tree(a: &mut Allocator, cursor: &mut BitCursor, short_atoms: bool) -> NodePtr { - match cursor.read_bits(1) { - None => a.nil(), - Some(0) => { - let first = make_tree(a, cursor, short_atoms); - let second = make_tree(a, cursor, short_atoms); - a.new_pair(first, second).unwrap() - } - Some(_) => { - if short_atoms { - match cursor.read_bits(8) { - None => a.nil(), - Some(val) => a.new_atom(&[val]).unwrap(), - } - } else { - match cursor.read_bits(6) { - None => a.nil(), - Some(len) => a.new_atom(&BUFFER[..len as usize]).unwrap(), - } - } - } - } -} - #[allow(dead_code)] fn hash_atom(buf: &[u8]) -> [u8; 32] { let mut ctx = Sha256::new(); diff --git a/fuzz/fuzz_targets/incremental_serializer.rs b/fuzz/fuzz_targets/incremental_serializer.rs index 5bb980d8..bc6916f1 100644 --- a/fuzz/fuzz_targets/incremental_serializer.rs +++ b/fuzz/fuzz_targets/incremental_serializer.rs @@ -1,10 +1,10 @@ #![no_main] -mod fuzzing_utils; +mod make_tree; use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes, Serializer}; use clvmr::{Allocator, NodePtr, SExp}; -use fuzzing_utils::{make_tree, BitCursor}; +use make_tree::make_tree_limits; use libfuzzer_sys::fuzz_target; @@ -76,15 +76,18 @@ fn insert_sentinel( // we ensure that serializing a structure in two steps results in a valid form // as well as that it correctly represents the tree. -fn do_fuzz(data: &[u8], short_atoms: bool) { - let mut cursor = BitCursor::new(data); - +fuzz_target!(|data: &[u8]| { + let mut unstructured = arbitrary::Unstructured::new(data); let mut allocator = Allocator::new(); - let program = make_tree(&mut allocator, &mut cursor, short_atoms); + + // since we copy the tree, we must limit the number of pairs created, to not + // exceed the limit of the Allocator + let program = make_tree_limits(&mut allocator, &mut unstructured, 10_000_000, 10_000_000); // this just needs to be a unique NodePtr, that won't appear in the tree let sentinel = allocator.new_pair(NodePtr::NIL, NodePtr::NIL).unwrap(); + let checkpoint = allocator.checkpoint(); // count up intil we've used every node as the sentinel/cut-point let mut node_idx = 0; @@ -108,10 +111,9 @@ fn do_fuzz(data: &[u8], short_atoms: bool) { let b2 = node_to_bytes(&allocator, program).unwrap(); assert_eq!(&hex::encode(&b1), &hex::encode(&b2)); - } -} -fuzz_target!(|data: &[u8]| { - do_fuzz(data, true); - do_fuzz(data, false); + // free the memory used by the last iteration from the allocator, + // otherwise we'll exceed the Allocator limits eventually + allocator.restore_checkpoint(&checkpoint); + } }); diff --git a/fuzz/fuzz_targets/make_tree.rs b/fuzz/fuzz_targets/make_tree.rs new file mode 100644 index 00000000..30f0ac5c --- /dev/null +++ b/fuzz/fuzz_targets/make_tree.rs @@ -0,0 +1,127 @@ +use arbitrary::{Arbitrary, Unstructured}; +use clvmr::{Allocator, NodePtr}; + +enum Op { + Pair(bool), + SubTree, +} + +#[derive(Arbitrary)] +enum NodeType { + Pair, + Bytes, + U8, + U16, + U32, + Previous, +} + +#[allow(dead_code)] +pub fn make_tree(a: &mut Allocator, unstructured: &mut Unstructured) -> NodePtr { + make_tree_limits(a, unstructured, 60_000_000, 60_000_000) +} + +pub fn make_tree_limits( + a: &mut Allocator, + unstructured: &mut Unstructured, + mut max_pairs: i64, + mut max_atoms: i64, +) -> NodePtr { + let mut previous_nodes = Vec::::new(); + let mut value_stack = Vec::::new(); + let mut op_stack = vec![Op::SubTree]; + // the number of Op::SubTree items on the op_stack + let mut sub_trees: i64 = 1; + + while let Some(op) = op_stack.pop() { + match op { + Op::Pair(swap) => { + let left = value_stack.pop().expect("internal error, empty stack"); + let right = value_stack.pop().expect("internal error, empty stack"); + let pair = if swap { + a.new_pair(left, right).expect("out of memory (pair)") + } else { + a.new_pair(right, left).expect("out of memory (pair)") + }; + value_stack.push(pair); + previous_nodes.push(pair); + } + Op::SubTree => { + sub_trees -= 1; + if unstructured.is_empty() { + value_stack.push(NodePtr::NIL); + } else { + match unstructured.arbitrary::() { + Err(..) => value_stack.push(NodePtr::NIL), + Ok(NodeType::Pair) => { + if sub_trees > unstructured.len() as i64 + || max_pairs <= 0 + || max_atoms <= 0 + { + // there isn't much entropy left, don't grow the + // tree anymore + value_stack.push( + *unstructured + .choose(&previous_nodes) + .unwrap_or(&NodePtr::NIL), + ); + } else { + // swap left and right arbitrarily, to avoid + // having a bias because we build the tree depth + // first, until we run out of entropy + let swap = unstructured.arbitrary::() == Ok(true); + op_stack.push(Op::Pair(swap)); + op_stack.push(Op::SubTree); + op_stack.push(Op::SubTree); + sub_trees += 2; + max_pairs -= 1; + max_atoms -= 2; + } + } + Ok(NodeType::Bytes) => { + value_stack.push(match unstructured.arbitrary::>() { + Err(..) => NodePtr::NIL, + Ok(val) => { + let node = a.new_atom(&val).expect("out of memory (atom)"); + previous_nodes.push(node); + node + } + }); + } + Ok(NodeType::U8) => { + value_stack.push(match unstructured.arbitrary::() { + Err(..) => NodePtr::NIL, + Ok(val) => a + .new_small_number(val.into()) + .expect("out of memory (atom)"), + }); + } + Ok(NodeType::U16) => { + value_stack.push(match unstructured.arbitrary::() { + Err(..) => NodePtr::NIL, + Ok(val) => a + .new_small_number(val.into()) + .expect("out of memory (atom)"), + }); + } + Ok(NodeType::U32) => { + value_stack.push(match unstructured.arbitrary::() { + Err(..) => NodePtr::NIL, + Ok(val) => a.new_number(val.into()).expect("out of memory (atom)"), + }); + } + Ok(NodeType::Previous) => { + value_stack.push( + *unstructured + .choose(&previous_nodes) + .unwrap_or(&NodePtr::NIL), + ); + } + } + } + } + } + } + assert_eq!(value_stack.len(), 1); + *value_stack.last().expect("internal error, empty stack") +} diff --git a/fuzz/fuzz_targets/node_eq.rs b/fuzz/fuzz_targets/node_eq.rs index a4c87a2b..c04a1308 100644 --- a/fuzz/fuzz_targets/node_eq.rs +++ b/fuzz/fuzz_targets/node_eq.rs @@ -1,12 +1,24 @@ use clvmr::{Allocator, NodePtr, SExp}; /// compare two CLVM trees. Returns true if they are identical, false otherwise -pub fn node_eq(allocator: &Allocator, s1: NodePtr, s2: NodePtr) -> bool { - match (allocator.sexp(s1), allocator.sexp(s2)) { - (SExp::Pair(s1a, s1b), SExp::Pair(s2a, s2b)) => { - node_eq(allocator, s1a, s2a) && node_eq(allocator, s1b, s2b) +pub fn node_eq(allocator: &Allocator, lhs: NodePtr, rhs: NodePtr) -> bool { + let mut stack = vec![(lhs, rhs)]; + + while let Some((l, r)) = stack.pop() { + match (allocator.sexp(l), allocator.sexp(r)) { + (SExp::Pair(ll, lr), SExp::Pair(rl, rr)) => { + stack.push((lr, rr)); + stack.push((ll, rl)); + } + (SExp::Atom, SExp::Atom) => { + if !allocator.atom_eq(l, r) { + return false; + } + } + _ => { + return false; + } } - (SExp::Atom, SExp::Atom) => allocator.atom_eq(s1, s2), - _ => false, } + true } diff --git a/fuzz/fuzz_targets/object_cache.rs b/fuzz/fuzz_targets/object_cache.rs index 2a2da782..d86efee7 100644 --- a/fuzz/fuzz_targets/object_cache.rs +++ b/fuzz/fuzz_targets/object_cache.rs @@ -1,30 +1,61 @@ #![no_main] mod fuzzing_utils; +mod make_tree; use clvmr::serde::{node_to_bytes, serialized_length, treehash, ObjectCache}; -use clvmr::Allocator; +use clvmr::{Allocator, NodePtr, SExp}; use libfuzzer_sys::fuzz_target; -use fuzzing_utils::{make_tree, tree_hash, visit_tree, BitCursor}; +use fuzzing_utils::{tree_hash, visit_tree}; -fn do_fuzz(data: &[u8], short_atoms: bool) { - let mut cursor = BitCursor::new(data); +enum Op { + Cons, + Traverse(NodePtr), +} + +fn compute_serialized_len(a: &Allocator, n: NodePtr) -> u64 { + let mut stack: Vec = vec![]; + let mut op_stack = vec![Op::Traverse(n)]; + + while let Some(op) = op_stack.pop() { + match op { + Op::Cons => { + let right = stack.pop().expect("internal error, empty stack"); + let left = stack.pop().expect("internal error, empty stack"); + stack.push(1 + left + right); + } + Op::Traverse(n) => match a.sexp(n) { + SExp::Pair(left, right) => { + op_stack.push(Op::Cons); + op_stack.push(Op::Traverse(left)); + op_stack.push(Op::Traverse(right)); + } + SExp::Atom => { + let ser_len = node_to_bytes(a, n) + .expect("internal error, failed to serialize") + .len() as u64; + stack.push(ser_len); + } + }, + } + } + assert_eq!(stack.len(), 1); + *stack.last().expect("internal error, empty stack") +} + +fuzz_target!(|data: &[u8]| { + let mut unstructured = arbitrary::Unstructured::new(data); let mut allocator = Allocator::new(); - let program = make_tree(&mut allocator, &mut cursor, short_atoms); + let program = make_tree::make_tree(&mut allocator, &mut unstructured); let mut hash_cache = ObjectCache::new(treehash); let mut length_cache = ObjectCache::new(serialized_length); visit_tree(&allocator, program, |a, node| { let expect_hash = tree_hash(a, node); - let expect_len = node_to_bytes(a, node).unwrap().len() as u64; + let expect_len = compute_serialized_len(a, node); let computed_hash = hash_cache.get_or_calculate(a, &node, None).unwrap(); let computed_len = length_cache.get_or_calculate(a, &node, None).unwrap(); assert_eq!(computed_hash, &expect_hash); assert_eq!(computed_len, &expect_len); }); -} - -fuzz_target!(|data: &[u8]| { - do_fuzz(data, true); - do_fuzz(data, false); }); diff --git a/fuzz/fuzz_targets/serializer.rs b/fuzz/fuzz_targets/serializer.rs index f116a86a..8d328775 100644 --- a/fuzz/fuzz_targets/serializer.rs +++ b/fuzz/fuzz_targets/serializer.rs @@ -1,6 +1,6 @@ #![no_main] -mod fuzzing_utils; +mod make_tree; mod node_eq; use clvmr::allocator::Allocator; @@ -11,11 +11,10 @@ use libfuzzer_sys::fuzz_target; // serializing with the regular compressed serializer should yield the same // result as using the incremental one (as long as it's in a single add() call). -fn do_fuzz(data: &[u8], short_atoms: bool) { - let mut cursor = fuzzing_utils::BitCursor::new(data); - +fuzz_target!(|data: &[u8]| { + let mut unstructured = arbitrary::Unstructured::new(data); let mut allocator = Allocator::new(); - let program = fuzzing_utils::make_tree(&mut allocator, &mut cursor, short_atoms); + let program = make_tree::make_tree(&mut allocator, &mut unstructured); let b1 = node_to_bytes_backrefs(&allocator, program).unwrap(); @@ -34,9 +33,4 @@ fn do_fuzz(data: &[u8], short_atoms: bool) { } assert_eq!(b1, b2); -} - -fuzz_target!(|data: &[u8]| { - do_fuzz(data, true); - do_fuzz(data, false); });