Merge pull request #536 from Chia-Network/improve-fuzzer

[CHIA-2071] [CHIA-2130] Improve fuzzer
Chia-Network · Jan 15, 2025 · 775c83d · 775c83d
2 parents d28edba + 26cc4f2
commit 775c83d
Show file tree

Hide file tree

Showing 10 changed files with 227 additions and 126 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -62,6 +62,7 @@ serde_json = "1.0.133"
 clap = "4.5.20"
 rand_chacha = "0.3.1"
 bitvec = "1.0.1"
+arbitrary = { version = "1.4.1", features = ["derive"] }
 
 [dependencies]
 lazy_static = { workspace = true }

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -13,6 +13,7 @@ libfuzzer-sys = { workspace = true }
 clvmr = { workspace = true }
 chia-sha2 = { workspace = true }
 hex = { workspace = true }
+arbitrary = { workspace = true }
 
 [[bin]]
 name = "fuzz_run_program"

diff --git a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs
@@ -1,17 +1,17 @@
 #![no_main]
 
-mod fuzzing_utils;
+mod make_tree;
 
 use clvmr::allocator::Allocator;
 use clvmr::serde::node_from_bytes_backrefs;
 use clvmr::serde::node_to_bytes_backrefs;
 use libfuzzer_sys::fuzz_target;
 
-fn do_fuzz(data: &[u8], short_atoms: bool) {
+fuzz_target!(|data: &[u8]| {
     let mut allocator = Allocator::new();
-    let mut cursor = fuzzing_utils::BitCursor::new(data);
+    let mut unstructured = arbitrary::Unstructured::new(data);
 
-    let program = fuzzing_utils::make_tree(&mut allocator, &mut cursor, short_atoms);
+    let program = make_tree::make_tree(&mut allocator, &mut unstructured);
 
     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
 
@@ -22,9 +22,4 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
     if b1 != b2 {
         panic!("b1 and b2 do not match");
     }
-}
-
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
 });
diff --git a/fuzz/fuzz_targets/fuzzing_utils.rs b/fuzz/fuzz_targets/fuzzing_utils.rs
@@ -1,83 +1,6 @@
 use chia_sha2::Sha256;
 use clvmr::allocator::{Allocator, NodePtr, SExp};
 
-pub struct BitCursor<'a> {
-    data: &'a [u8],
-    bit_offset: u8,
-}
-
-fn mask(num: u8) -> u8 {
-    0xff >> num
-}
-
-impl<'a> BitCursor<'a> {
-    pub fn new(data: &'a [u8]) -> Self {
-        BitCursor {
-            data,
-            bit_offset: 0,
-        }
-    }
-
-    pub fn read_bits(&mut self, mut num: u8) -> Option<u8> {
-        assert!(num <= 8);
-        let ret = if self.data.is_empty() {
-            num = 0;
-            None
-        } else if self.bit_offset + num <= 8 {
-            Some((self.data[0] & mask(self.bit_offset)) >> (8 - num - self.bit_offset))
-        } else if self.data.len() < 2 {
-            num = 8 - self.bit_offset;
-            Some(self.data[0] & mask(self.bit_offset))
-        } else {
-            let first_byte = 8 - self.bit_offset;
-            let second_byte = num - first_byte;
-            Some(
-                ((self.data[0] & mask(self.bit_offset)) << second_byte)
-                    | (self.data[1] >> (8 - second_byte)),
-            )
-        };
-        self.advance(num);
-        ret
-    }
-
-    fn advance(&mut self, bits: u8) {
-        let bits = self.bit_offset as u32 + bits as u32;
-        if bits >= 8 {
-            self.data = &self.data[(bits / 8) as usize..];
-        }
-        self.bit_offset = (bits % 8) as u8;
-    }
-}
-
-const BUFFER: [u8; 63] = [
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-];
-
-pub fn make_tree(a: &mut Allocator, cursor: &mut BitCursor, short_atoms: bool) -> NodePtr {
-    match cursor.read_bits(1) {
-        None => a.nil(),
-        Some(0) => {
-            let first = make_tree(a, cursor, short_atoms);
-            let second = make_tree(a, cursor, short_atoms);
-            a.new_pair(first, second).unwrap()
-        }
-        Some(_) => {
-            if short_atoms {
-                match cursor.read_bits(8) {
-                    None => a.nil(),
-                    Some(val) => a.new_atom(&[val]).unwrap(),
-                }
-            } else {
-                match cursor.read_bits(6) {
-                    None => a.nil(),
-                    Some(len) => a.new_atom(&BUFFER[..len as usize]).unwrap(),
-                }
-            }
-        }
-    }
-}
-
 #[allow(dead_code)]
 fn hash_atom(buf: &[u8]) -> [u8; 32] {
     let mut ctx = Sha256::new();

diff --git a/fuzz/fuzz_targets/incremental_serializer.rs b/fuzz/fuzz_targets/incremental_serializer.rs
@@ -1,10 +1,10 @@
 #![no_main]
 
-mod fuzzing_utils;
+mod make_tree;
 
 use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes, Serializer};
 use clvmr::{Allocator, NodePtr, SExp};
-use fuzzing_utils::{make_tree, BitCursor};
+use make_tree::make_tree_limits;
 
 use libfuzzer_sys::fuzz_target;
 
@@ -76,15 +76,18 @@ fn insert_sentinel(
 
 // we ensure that serializing a structure in two steps results in a valid form
 // as well as that it correctly represents the tree.
-fn do_fuzz(data: &[u8], short_atoms: bool) {
-    let mut cursor = BitCursor::new(data);
-
+fuzz_target!(|data: &[u8]| {
+    let mut unstructured = arbitrary::Unstructured::new(data);
     let mut allocator = Allocator::new();
-    let program = make_tree(&mut allocator, &mut cursor, short_atoms);
+
+    // since we copy the tree, we must limit the number of pairs created, to not
+    // exceed the limit of the Allocator
+    let program = make_tree_limits(&mut allocator, &mut unstructured, 10_000_000, 10_000_000);
 
     // this just needs to be a unique NodePtr, that won't appear in the tree
     let sentinel = allocator.new_pair(NodePtr::NIL, NodePtr::NIL).unwrap();
 
+    let checkpoint = allocator.checkpoint();
     // count up intil we've used every node as the sentinel/cut-point
     let mut node_idx = 0;
 
@@ -108,10 +111,9 @@ fn do_fuzz(data: &[u8], short_atoms: bool) {
         let b2 = node_to_bytes(&allocator, program).unwrap();
 
         assert_eq!(&hex::encode(&b1), &hex::encode(&b2));
-    }
-}
 
-fuzz_target!(|data: &[u8]| {
-    do_fuzz(data, true);
-    do_fuzz(data, false);
+        // free the memory used by the last iteration from the allocator,
+        // otherwise we'll exceed the Allocator limits eventually
+        allocator.restore_checkpoint(&checkpoint);
+    }
 });
diff --git a/fuzz/fuzz_targets/make_tree.rs b/fuzz/fuzz_targets/make_tree.rs
@@ -0,0 +1,127 @@
+use arbitrary::{Arbitrary, Unstructured};
+use clvmr::{Allocator, NodePtr};
+
+enum Op {
+    Pair(bool),
+    SubTree,
+}
+
+#[derive(Arbitrary)]
+enum NodeType {
+    Pair,
+    Bytes,
+    U8,
+    U16,
+    U32,
+    Previous,
+}
+
+#[allow(dead_code)]
+pub fn make_tree(a: &mut Allocator, unstructured: &mut Unstructured) -> NodePtr {
+    make_tree_limits(a, unstructured, 60_000_000, 60_000_000)
+}
+
+pub fn make_tree_limits(
+    a: &mut Allocator,
+    unstructured: &mut Unstructured,
+    mut max_pairs: i64,
+    mut max_atoms: i64,
+) -> NodePtr {
+    let mut previous_nodes = Vec::<NodePtr>::new();
+    let mut value_stack = Vec::<NodePtr>::new();
+    let mut op_stack = vec![Op::SubTree];
+    // the number of Op::SubTree items on the op_stack
+    let mut sub_trees: i64 = 1;
+
+    while let Some(op) = op_stack.pop() {
+        match op {
+            Op::Pair(swap) => {
+                let left = value_stack.pop().expect("internal error, empty stack");
+                let right = value_stack.pop().expect("internal error, empty stack");
+                let pair = if swap {
+                    a.new_pair(left, right).expect("out of memory (pair)")
+                } else {
+                    a.new_pair(right, left).expect("out of memory (pair)")
+                };
+                value_stack.push(pair);
+                previous_nodes.push(pair);
+            }
+            Op::SubTree => {
+                sub_trees -= 1;
+                if unstructured.is_empty() {
+                    value_stack.push(NodePtr::NIL);
+                } else {
+                    match unstructured.arbitrary::<NodeType>() {
+                        Err(..) => value_stack.push(NodePtr::NIL),
+                        Ok(NodeType::Pair) => {
+                            if sub_trees > unstructured.len() as i64
+                                || max_pairs <= 0
+                                || max_atoms <= 0
+                            {
+                                // there isn't much entropy left, don't grow the
+                                // tree anymore
+                                value_stack.push(
+                                    *unstructured
+                                        .choose(&previous_nodes)
+                                        .unwrap_or(&NodePtr::NIL),
+                                );
+                            } else {
+                                // swap left and right arbitrarily, to avoid
+                                // having a bias because we build the tree depth
+                                // first, until we run out of entropy
+                                let swap = unstructured.arbitrary::<bool>() == Ok(true);
+                                op_stack.push(Op::Pair(swap));
+                                op_stack.push(Op::SubTree);
+                                op_stack.push(Op::SubTree);
+                                sub_trees += 2;
+                                max_pairs -= 1;
+                                max_atoms -= 2;
+                            }
+                        }
+                        Ok(NodeType::Bytes) => {
+                            value_stack.push(match unstructured.arbitrary::<Vec<u8>>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => {
+                                    let node = a.new_atom(&val).expect("out of memory (atom)");
+                                    previous_nodes.push(node);
+                                    node
+                                }
+                            });
+                        }
+                        Ok(NodeType::U8) => {
+                            value_stack.push(match unstructured.arbitrary::<u8>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a
+                                    .new_small_number(val.into())
+                                    .expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::U16) => {
+                            value_stack.push(match unstructured.arbitrary::<u16>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a
+                                    .new_small_number(val.into())
+                                    .expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::U32) => {
+                            value_stack.push(match unstructured.arbitrary::<u32>() {
+                                Err(..) => NodePtr::NIL,
+                                Ok(val) => a.new_number(val.into()).expect("out of memory (atom)"),
+                            });
+                        }
+                        Ok(NodeType::Previous) => {
+                            value_stack.push(
+                                *unstructured
+                                    .choose(&previous_nodes)
+                                    .unwrap_or(&NodePtr::NIL),
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    }
+    assert_eq!(value_stack.len(), 1);
+    *value_stack.last().expect("internal error, empty stack")
+}
diff --git a/fuzz/fuzz_targets/node_eq.rs b/fuzz/fuzz_targets/node_eq.rs
@@ -1,12 +1,24 @@
 use clvmr::{Allocator, NodePtr, SExp};
 
 /// compare two CLVM trees. Returns true if they are identical, false otherwise
-pub fn node_eq(allocator: &Allocator, s1: NodePtr, s2: NodePtr) -> bool {
-    match (allocator.sexp(s1), allocator.sexp(s2)) {
-        (SExp::Pair(s1a, s1b), SExp::Pair(s2a, s2b)) => {
-            node_eq(allocator, s1a, s2a) && node_eq(allocator, s1b, s2b)
+pub fn node_eq(allocator: &Allocator, lhs: NodePtr, rhs: NodePtr) -> bool {
+    let mut stack = vec![(lhs, rhs)];
+
+    while let Some((l, r)) = stack.pop() {
+        match (allocator.sexp(l), allocator.sexp(r)) {
+            (SExp::Pair(ll, lr), SExp::Pair(rl, rr)) => {
+                stack.push((lr, rr));
+                stack.push((ll, rl));
+            }
+            (SExp::Atom, SExp::Atom) => {
+                if !allocator.atom_eq(l, r) {
+                    return false;
+                }
+            }
+            _ => {
+                return false;
+            }
         }
-        (SExp::Atom, SExp::Atom) => allocator.atom_eq(s1, s2),
-        _ => false,
     }
+    true
 }