From bd38c540f97fd3b60cd360e3636485c6c76fd95c Mon Sep 17 00:00:00 2001
From: Arvid Norberg <arvid@libtorrent.org>
Date: Tue, 14 Jan 2025 11:04:25 +0100
Subject: [PATCH 1/2] address potential stack overflow in node_eq (in fuzzer)

---
 fuzz/fuzz_targets/node_eq.rs | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/fuzz/fuzz_targets/node_eq.rs b/fuzz/fuzz_targets/node_eq.rs
index a4c87a2b..c04a1308 100644
--- a/fuzz/fuzz_targets/node_eq.rs
+++ b/fuzz/fuzz_targets/node_eq.rs
@@ -1,12 +1,24 @@
 use clvmr::{Allocator, NodePtr, SExp};
 
 /// compare two CLVM trees. Returns true if they are identical, false otherwise
-pub fn node_eq(allocator: &Allocator, s1: NodePtr, s2: NodePtr) -> bool {
-    match (allocator.sexp(s1), allocator.sexp(s2)) {
-        (SExp::Pair(s1a, s1b), SExp::Pair(s2a, s2b)) => {
-            node_eq(allocator, s1a, s2a) && node_eq(allocator, s1b, s2b)
+pub fn node_eq(allocator: &Allocator, lhs: NodePtr, rhs: NodePtr) -> bool {
+    let mut stack = vec![(lhs, rhs)];
+
+    while let Some((l, r)) = stack.pop() {
+        match (allocator.sexp(l), allocator.sexp(r)) {
+            (SExp::Pair(ll, lr), SExp::Pair(rl, rr)) => {
+                stack.push((lr, rr));
+                stack.push((ll, rl));
+            }
+            (SExp::Atom, SExp::Atom) => {
+                if !allocator.atom_eq(l, r) {
+                    return false;
+                }
+            }
+            _ => {
+                return false;
+            }
         }
-        (SExp::Atom, SExp::Atom) => allocator.atom_eq(s1, s2),
-        _ => false,
     }
+    true
 }

From 26cc4f269fd0e48227b3665e35a9f5eacc657433 Mon Sep 17 00:00:00 2001
From: Arvid Norberg <arvid@libtorrent.org>
Date: Tue, 14 Jan 2025 13:16:01 +0100
Subject: [PATCH 2/2] transition make_tree() to using the arbitrary crate. Also
 make it able to generate trees with reused nodes

---
 Cargo.lock                                    |  19 ++-
 Cargo.toml                                    |   1 +
 fuzz/Cargo.toml                               |   1 +
 fuzz/fuzz_targets/deserialize_br_rand_tree.rs |  13 +-
 fuzz/fuzz_targets/fuzzing_utils.rs            |  77 -----------
 fuzz/fuzz_targets/incremental_serializer.rs   |  24 ++--
 fuzz/fuzz_targets/make_tree.rs                | 127 ++++++++++++++++++
 fuzz/fuzz_targets/object_cache.rs             |  53 ++++++--
 fuzz/fuzz_targets/serializer.rs               |  14 +-
 9 files changed, 209 insertions(+), 120 deletions(-)
 create mode 100644 fuzz/fuzz_targets/make_tree.rs

diff --git a/Cargo.lock b/Cargo.lock
index ac884dda..9fa25967 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -74,9 +74,12 @@ dependencies = [
 
 [[package]]
 name = "arbitrary"
-version = "1.3.2"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
+checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
+dependencies = [
+ "derive_arbitrary",
+]
 
 [[package]]
 name = "autocfg"
@@ -311,6 +314,7 @@ dependencies = [
 name = "clvm_rs-fuzz"
 version = "1.0.0"
 dependencies = [
+ "arbitrary",
  "chia-sha2",
  "clvmr",
  "hex",
@@ -480,6 +484,17 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "derive_arbitrary"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "digest"
 version = "0.10.7"
diff --git a/Cargo.toml b/Cargo.toml
index 7ee0d497..5768e282 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,6 +62,7 @@ serde_json = "1.0.133"
 clap = "4.5.20"
 rand_chacha = "0.3.1"
 bitvec = "1.0.1"
+arbitrary = { version = "1.4.1", features = ["derive"] }
 
 [dependencies]
 lazy_static = { workspace = true }
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index c794a338..4ebc5daa 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -13,6 +13,7 @@ libfuzzer-sys = { workspace = true }
 clvmr = { workspace = true }
 chia-sha2 = { workspace = true }
 hex = { workspace = true }
+arbitrary = { workspace = true }
 
 [[bin]]
 name = "fuzz_run_program"
diff --git a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs
index 8397b7de..95532218 100644
--- a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs
+++ b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs
@@ -1,17 +1,17 @@
 #![no_main]
 
-mod fuzzing_utils;
+mod make_tree;
 
 use clvmr::allocator::Allocator;
 use clvmr::serde::node_from_bytes_backrefs;
 use clvmr::serde::node_to_bytes_backrefs;
 use libfuzzer_sys::fuzz_target;
 
-fn do_fuzz(data: &[u8], short_atoms: bool) {
+fuzz_target!(|data: &[u8]| {
     let mut allocator = Allocator::new();
-    let mut cursor = fuzzing_utils::BitCursor::new(data);
+    let mut unstructured = arbitrary::Unstructured::new(data);
 
-    let program = fuzzing_utils::make_tree(&mut allocator, &mut cursor, short_atoms);
+    let program = make_tree::make_tree(&mut allocator, &mut unstructured);
 
     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
 
@@ -22,9 +22,4 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
     if b1 != b2 {
         panic!("b1 and b2 do not match");
     }
-}
-
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
 });
diff --git a/fuzz/fuzz_targets/fuzzing_utils.rs b/fuzz/fuzz_targets/fuzzing_utils.rs
index 24e30e05..b7656bca 100644
--- a/fuzz/fuzz_targets/fuzzing_utils.rs
+++ b/fuzz/fuzz_targets/fuzzing_utils.rs
@@ -1,83 +1,6 @@
 use chia_sha2::Sha256;
 use clvmr::allocator::{Allocator, NodePtr, SExp};
 
-pub struct BitCursor<'a> {
-    data: &'a [u8],
-    bit_offset: u8,
-}
-
-fn mask(num: u8) -> u8 {
-    0xff >> num
-}
-
-impl<'a> BitCursor<'a> {
-    pub fn new(data: &'a [u8]) -> Self {
-        BitCursor {
-            data,
-            bit_offset: 0,
-        }
-    }
-
-    pub fn read_bits(&mut self, mut num: u8) -> Option<u8> {
-        assert!(num <= 8);
-        let ret = if self.data.is_empty() {
-            num = 0;
-            None
-        } else if self.bit_offset + num <= 8 {
-            Some((self.data[0] & mask(self.bit_offset)) >> (8 - num - self.bit_offset))
-        } else if self.data.len() < 2 {
-            num = 8 - self.bit_offset;
-            Some(self.data[0] & mask(self.bit_offset))
-        } else {
-            let first_byte = 8 - self.bit_offset;
-            let second_byte = num - first_byte;
-            Some(
-                ((self.data[0] & mask(self.bit_offset)) << second_byte)
-                    | (self.data[1] >> (8 - second_byte)),
-            )
-        };
-        self.advance(num);
-        ret
-    }
-
-    fn advance(&mut self, bits: u8) {
-        let bits = self.bit_offset as u32 + bits as u32;
-        if bits >= 8 {
-            self.data = &self.data[(bits / 8) as usize..];
-        }
-        self.bit_offset = (bits % 8) as u8;
-    }
-}
-
-const BUFFER: [u8; 63] = [
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-];
-
-pub fn make_tree(a: &mut Allocator, cursor: &mut BitCursor, short_atoms: bool) -> NodePtr {
-    match cursor.read_bits(1) {
-        None => a.nil(),
-        Some(0) => {
-            let first = make_tree(a, cursor, short_atoms);
-            let second = make_tree(a, cursor, short_atoms);
-            a.new_pair(first, second).unwrap()
-        }
-        Some(_) => {
-            if short_atoms {
-                match cursor.read_bits(8) {
-                    None => a.nil(),
-                    Some(val) => a.new_atom(&[val]).unwrap(),
-                }
-            } else {
-                match cursor.read_bits(6) {
-                    None => a.nil(),
-                    Some(len) => a.new_atom(&BUFFER[..len as usize]).unwrap(),
-                }
-            }
-        }
-    }
-}
-
 #[allow(dead_code)]
 fn hash_atom(buf: &[u8]) -> [u8; 32] {
     let mut ctx = Sha256::new();
diff --git a/fuzz/fuzz_targets/incremental_serializer.rs b/fuzz/fuzz_targets/incremental_serializer.rs
index 5bb980d8..bc6916f1 100644
--- a/fuzz/fuzz_targets/incremental_serializer.rs
+++ b/fuzz/fuzz_targets/incremental_serializer.rs
@@ -1,10 +1,10 @@
 #![no_main]
 
-mod fuzzing_utils;
+mod make_tree;
 
 use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes, Serializer};
 use clvmr::{Allocator, NodePtr, SExp};
-use fuzzing_utils::{make_tree, BitCursor};
+use make_tree::make_tree_limits;
 
 use libfuzzer_sys::fuzz_target;
 
@@ -76,15 +76,18 @@ fn insert_sentinel(
 
 // we ensure that serializing a structure in two steps results in a valid form
 // as well as that it correctly represents the tree.
-fn do_fuzz(data: &[u8], short_atoms: bool) {
-    let mut cursor = BitCursor::new(data);
-
+fuzz_target!(|data: &[u8]| {
+    let mut unstructured = arbitrary::Unstructured::new(data);
     let mut allocator = Allocator::new();
-    let program = make_tree(&mut allocator, &mut cursor, short_atoms);
+
+    // since we copy the tree, we must limit the number of pairs created, to not
+    // exceed the limit of the Allocator
+    let program = make_tree_limits(&mut allocator, &mut unstructured, 10_000_000, 10_000_000);
 
     // this just needs to be a unique NodePtr, that won't appear in the tree
     let sentinel = allocator.new_pair(NodePtr::NIL, NodePtr::NIL).unwrap();
 
+    let checkpoint = allocator.checkpoint();
     // count up intil we've used every node as the sentinel/cut-point
     let mut node_idx = 0;
 
@@ -108,10 +111,9 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
         let b2 = node_to_bytes(&allocator, program).unwrap();
 
         assert_eq!(&hex::encode(&b1), &hex::encode(&b2));
-    }
-}
 
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
+        // free the memory used by the last iteration from the allocator,
+        // otherwise we'll exceed the Allocator limits eventually
+        allocator.restore_checkpoint(&checkpoint);
+    }
 });
diff --git a/fuzz/fuzz_targets/make_tree.rs b/fuzz/fuzz_targets/make_tree.rs
new file mode 100644
index 00000000..30f0ac5c
--- /dev/null
+++ b/fuzz/fuzz_targets/make_tree.rs
@@ -0,0 +1,127 @@
+use arbitrary::{Arbitrary, Unstructured};
+use clvmr::{Allocator, NodePtr};
+
+enum Op {
+    Pair(bool),
+    SubTree,
+}
+
+#[derive(Arbitrary)]
+enum NodeType {
+    Pair,
+    Bytes,
+    U8,
+    U16,
+    U32,
+    Previous,
+}
+
+#[allow(dead_code)]
+pub fn make_tree(a: &mut Allocator, unstructured: &mut Unstructured) -> NodePtr {
+    make_tree_limits(a, unstructured, 60_000_000, 60_000_000)
+}
+
+pub fn make_tree_limits(
+    a: &mut Allocator,
+    unstructured: &mut Unstructured,
+    mut max_pairs: i64,
+    mut max_atoms: i64,
+) -> NodePtr {
+    let mut previous_nodes = Vec::<NodePtr>::new();
+    let mut value_stack = Vec::<NodePtr>::new();
+    let mut op_stack = vec![Op::SubTree];
+    // the number of Op::SubTree items on the op_stack
+    let mut sub_trees: i64 = 1;
+
+    while let Some(op) = op_stack.pop() {
+        match op {
+            Op::Pair(swap) => {
+                let left = value_stack.pop().expect("internal error, empty stack");
+                let right = value_stack.pop().expect("internal error, empty stack");
+                let pair = if swap {
+                    a.new_pair(left, right).expect("out of memory (pair)")
+                } else {
+                    a.new_pair(right, left).expect("out of memory (pair)")
+                };
+                value_stack.push(pair);
+                previous_nodes.push(pair);
+            }
+            Op::SubTree => {
+                sub_trees -= 1;
+                if unstructured.is_empty() {
+                    value_stack.push(NodePtr::NIL);
+                } else {
+                    match unstructured.arbitrary::<NodeType>() {
+                        Err(..) => value_stack.push(NodePtr::NIL),
+                        Ok(NodeType::Pair) => {
+                            if sub_trees > unstructured.len() as i64
+                                || max_pairs <= 0
+                                || max_atoms <= 0
+                            {
+                                // there isn't much entropy left, don't grow the
+                                // tree anymore
+                                value_stack.push(
+                                    *unstructured
+                                        .choose(&previous_nodes)
+                                        .unwrap_or(&NodePtr::NIL),
+                                );
+                            } else {
+                                // swap left and right arbitrarily, to avoid
+                                // having a bias because we build the tree depth
+                                // first, until we run out of entropy
+                                let swap = unstructured.arbitrary::<bool>() == Ok(true);
+                                op_stack.push(Op::Pair(swap));
+                                op_stack.push(Op::SubTree);
+                                op_stack.push(Op::SubTree);
+                                sub_trees += 2;
+                                max_pairs -= 1;
+                                max_atoms -= 2;
+                            }
+                        }
+                        Ok(NodeType::Bytes) => {
+                            value_stack.push(match unstructured.arbitrary::<Vec<u8>>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => {
+                                    let node = a.new_atom(&val).expect("out of memory (atom)");
+                                    previous_nodes.push(node);
+                                    node
+                                }
+                            });
+                        }
+                        Ok(NodeType::U8) => {
+                            value_stack.push(match unstructured.arbitrary::<u8>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a
+                                    .new_small_number(val.into())
+                                    .expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::U16) => {
+                            value_stack.push(match unstructured.arbitrary::<u16>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a
+                                    .new_small_number(val.into())
+                                    .expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::U32) => {
+                            value_stack.push(match unstructured.arbitrary::<u32>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a.new_number(val.into()).expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::Previous) => {
+                            value_stack.push(
+                                *unstructured
+                                    .choose(&previous_nodes)
+                                    .unwrap_or(&NodePtr::NIL),
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    }
+    assert_eq!(value_stack.len(), 1);
+    *value_stack.last().expect("internal error, empty stack")
+}
diff --git a/fuzz/fuzz_targets/object_cache.rs b/fuzz/fuzz_targets/object_cache.rs
index 2a2da782..d86efee7 100644
--- a/fuzz/fuzz_targets/object_cache.rs
+++ b/fuzz/fuzz_targets/object_cache.rs
@@ -1,30 +1,61 @@
 #![no_main]
 mod fuzzing_utils;
+mod make_tree;
 
 use clvmr::serde::{node_to_bytes, serialized_length, treehash, ObjectCache};
-use clvmr::Allocator;
+use clvmr::{Allocator, NodePtr, SExp};
 use libfuzzer_sys::fuzz_target;
 
-use fuzzing_utils::{make_tree, tree_hash, visit_tree, BitCursor};
+use fuzzing_utils::{tree_hash, visit_tree};
 
-fn do_fuzz(data: &[u8], short_atoms: bool) {
-    let mut cursor = BitCursor::new(data);
+enum Op {
+    Cons,
+    Traverse(NodePtr),
+}
+
+fn compute_serialized_len(a: &Allocator, n: NodePtr) -> u64 {
+    let mut stack: Vec<u64> = vec![];
+    let mut op_stack = vec![Op::Traverse(n)];
+
+    while let Some(op) = op_stack.pop() {
+        match op {
+            Op::Cons => {
+                let right = stack.pop().expect("internal error, empty stack");
+                let left = stack.pop().expect("internal error, empty stack");
+                stack.push(1 + left + right);
+            }
+            Op::Traverse(n) => match a.sexp(n) {
+                SExp::Pair(left, right) => {
+                    op_stack.push(Op::Cons);
+                    op_stack.push(Op::Traverse(left));
+                    op_stack.push(Op::Traverse(right));
+                }
+                SExp::Atom => {
+                    let ser_len = node_to_bytes(a, n)
+                        .expect("internal error, failed to serialize")
+                        .len() as u64;
+                    stack.push(ser_len);
+                }
+            },
+        }
+    }
+    assert_eq!(stack.len(), 1);
+    *stack.last().expect("internal error, empty stack")
+}
+
+fuzz_target!(|data: &[u8]| {
+    let mut unstructured = arbitrary::Unstructured::new(data);
     let mut allocator = Allocator::new();
-    let program = make_tree(&mut allocator, &mut cursor, short_atoms);
+    let program = make_tree::make_tree(&mut allocator, &mut unstructured);
 
     let mut hash_cache = ObjectCache::new(treehash);
     let mut length_cache = ObjectCache::new(serialized_length);
     visit_tree(&allocator, program, |a, node| {
         let expect_hash = tree_hash(a, node);
-        let expect_len = node_to_bytes(a, node).unwrap().len() as u64;
+        let expect_len = compute_serialized_len(a, node);
         let computed_hash = hash_cache.get_or_calculate(a, &node, None).unwrap();
         let computed_len = length_cache.get_or_calculate(a, &node, None).unwrap();
         assert_eq!(computed_hash, &expect_hash);
         assert_eq!(computed_len, &expect_len);
     });
-}
-
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
 });
diff --git a/fuzz/fuzz_targets/serializer.rs b/fuzz/fuzz_targets/serializer.rs
index f116a86a..8d328775 100644
--- a/fuzz/fuzz_targets/serializer.rs
+++ b/fuzz/fuzz_targets/serializer.rs
@@ -1,6 +1,6 @@
 #![no_main]
 
-mod fuzzing_utils;
+mod make_tree;
 mod node_eq;
 
 use clvmr::allocator::Allocator;
@@ -11,11 +11,10 @@ use libfuzzer_sys::fuzz_target;
 
 // serializing with the regular compressed serializer should yield the same
 // result as using the incremental one (as long as it's in a single add() call).
-fn do_fuzz(data: &[u8], short_atoms: bool) {
-    let mut cursor = fuzzing_utils::BitCursor::new(data);
-
+fuzz_target!(|data: &[u8]| {
+    let mut unstructured = arbitrary::Unstructured::new(data);
     let mut allocator = Allocator::new();
-    let program = fuzzing_utils::make_tree(&mut allocator, &mut cursor, short_atoms);
+    let program = make_tree::make_tree(&mut allocator, &mut unstructured);
 
     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
 
@@ -34,9 +33,4 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
     }
 
     assert_eq!(b1, b2);
-}
-
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
 });