diff --git a/Cargo.lock b/Cargo.lock index bd9a7fe0f6..ce209ff2a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,6 +126,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arrayvec" version = "0.5.2" @@ -717,6 +726,17 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_more" version = "0.99.20" @@ -1034,6 +1054,14 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzz" +version = "0.0.0" +dependencies = [ + "hugr-model", + "libfuzzer-sys", +] + [[package]] name = "fxhash" version = "0.2.1" @@ -1312,6 +1340,7 @@ dependencies = [ name = "hugr-model" version = "0.24.0" dependencies = [ + "arbitrary", "base64", "bumpalo", "capnp", @@ -1810,6 +1839,16 @@ version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +[[package]] +name = "libfuzzer-sys" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" +dependencies = [ + "arbitrary", + "cc", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2018,6 +2057,7 @@ version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" dependencies = [ + "arbitrary", "num-traits", "rand 0.8.5", "serde", @@ -2947,6 +2987,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9676b89cd56310a87b93dec47b11af744f34d5fc9f367b829474eec0a891350d" dependencies = [ + "arbitrary", "borsh", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 3a6691d184..48a8f924d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,14 +4,15 @@ lto = "thin" [workspace] resolver = "2" members = [ - "hugr", - "hugr-core", - "hugr-passes", - "hugr-cli", - "hugr-model", - "hugr-llvm", - "hugr-py", - "hugr-persistent", + "hugr", + "hugr-core", + "hugr-passes", + "hugr-cli", + "hugr-model", + "hugr-llvm", + "hugr-py", + "hugr-persistent", + "fuzz", ] default-members = ["hugr", "hugr-core", "hugr-passes", "hugr-cli", "hugr-model"] @@ -25,10 +26,10 @@ license = "Apache-2.0" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ - # Set by our CI - 'cfg(ci_run)', - # Set by codecov - 'cfg(coverage,coverage_nightly)', + # Set by our CI + 'cfg(ci_run)', + # Set by codecov + 'cfg(coverage,coverage_nightly)', ] } missing_docs = "warn" diff --git a/devenv.lock b/devenv.lock index f92191e958..6e04a2b586 100644 --- a/devenv.lock +++ b/devenv.lock @@ -55,7 +55,7 @@ "gitignore": { "inputs": { "nixpkgs": [ - "git-hooks", + "pre-commit-hooks", "nixpkgs" ] }, diff --git a/devenv.nix b/devenv.nix index b5fb12dc01..ee17713386 100644 --- a/devenv.nix +++ b/devenv.nix @@ -50,7 +50,7 @@ in # https://devenv.sh/languages/ # https://devenv.sh/reference/options/#languagesrustversion languages.rust = { - channel = "stable"; + channel = "nightly"; enable = true; components = [ "rustc" "cargo" "clippy" "rustfmt" "rust-analyzer" ]; }; diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000000..524b25fcea --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +hugr-model = { path = "../hugr-model/", features = ["arbitrary"] } + +# [dependencies.hugr] +# path = ".." + +[[bin]] +name = "fuzz_random" +path = "fuzz_targets/fuzz_random.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_structure" +path = "fuzz_targets/fuzz_structure.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000000..68b9fc1c48 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,64 @@ +# Fuzz testing + +This project uses `cargo-fuzz` for doing fuzz testing for hugr. + +## Requisites + +1. Install `cargo-fuzz` with: `cargo install cargo-fuzz` +2. Build with `cargo fuzz build` + +> [!NOTE] +> The `libFuzzer` used by `cargo-fuzz` needs **nightly**. + +## Fuzz targets + +You can list the fuzzing targets with: +`cargo fuzz list` + +### Model: Random + +The [fuzz_random](./fuzz_targets/fuzz_random.rs) target uses the coverage-guided +`libFuzzer` fuzzing engine to generate random bytes that we then try to +convert to a package with `hugr_model::v0::ast::Package::from_str()`. + +To run this target: +`cargo fuzz run fuzz_random` + +It is recommended to provide the `libFuzzer` with a corpus to speed up the +generation of test inputs. For this we can use the fixtures in +`hugr/hugr-model/tests/fixtures`: +`cargo fuzz run fuzz_random ../hugr-model/tests/fixtures` + +If you want `libFuzzer` to mutate the examples with ascii characters only: +`cargo fuzz run fuzz_random -- -only_ascii=1` + +### Model: Structure + +The [fuzz_structure](./fuzz_targets/fuzz_structure.rs) target uses `libFuzzer` to do +[structure-aware](https://rust-fuzz.github.io/book/cargo-fuzz/structure-aware-fuzzing.html) +modifications of the `hugr_model::v0::ast::Package` and its members. + +To run this target: +`cargo fuzz run fuzz_structure` + +> [!NOTE] +> This target needs some slight modifications to the `hugr-model` source +> code so the structs and enums can derive the `Arbitrary` implementations +> needed by `libFuzzer`. +> The `arbitrary` features for `ordered-float` and `smol_str` are also needed. + +## Results + +The fuzzing process will be terminated once a crash is detected, and the offending input +will be saved to the `artifacts/` directory. You can reproduce the crash by doing: +`cargo fuzz run fuzz_structure artifacts//crash-XXXXXX` + +If you want to keep the fuzzing process, even after a crash has been detected, +you can provide the options `-fork=1` and `-ignore_crashes=1`. + +## Providing options to `libFuzzer` + +You can provide lots of options to `libFuzzer` by doing `cargo fuzz run -- -flag1=val1 -flag2=val2`. + +To see all the available options: +`cargo fuzz run -- -help=1` diff --git a/fuzz/fuzz_targets/fuzz_random.rs b/fuzz/fuzz_targets/fuzz_random.rs new file mode 100644 index 0000000000..866e5405f7 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_random.rs @@ -0,0 +1,11 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use hugr_model::v0 as model; +use std::str::FromStr; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = std::str::from_utf8(data) { + let _package_ast = model::ast::Package::from_str(&s); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_structure.rs b/fuzz/fuzz_targets/fuzz_structure.rs new file mode 100644 index 0000000000..ec0882c56a --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_structure.rs @@ -0,0 +1,14 @@ +#![no_main] + +use hugr_model::v0 as model; +use libfuzzer_sys::fuzz_target; +use model::bumpalo::Bump; +use hugr_model::v0::ast::Package; + +fuzz_target!(|package: Package| { + let bump = Bump::new(); + let package = package.resolve(&bump).unwrap(); + let bytes = model::binary::write_to_vec(&package); + let deserialized_package = model::binary::read_from_slice(&bytes, &bump).unwrap(); + assert_eq!(package, deserialized_package); +}); diff --git a/hugr-model/Cargo.toml b/hugr-model/Cargo.toml index ff53f19c28..a337df972c 100644 --- a/hugr-model/Cargo.toml +++ b/hugr-model/Cargo.toml @@ -22,15 +22,16 @@ capnp = { workspace = true } derive_more = { workspace = true, features = ["display", "error", "from"] } indexmap.workspace = true itertools.workspace = true -ordered-float = { workspace = true } +ordered-float = { workspace = true, features = ["arbitrary"] } pest = { workspace = true } pest_derive = { workspace = true } pretty = { workspace = true } rustc-hash.workspace = true semver = { workspace = true } -smol_str = { workspace = true, features = ["serde"] } +smol_str = { workspace = true, features = ["serde", "arbitrary"] } thiserror.workspace = true pyo3 = { workspace = true, optional = true, features = ["extension-module"] } +arbitrary = { version = "1", optional = true, features = ["derive"] } [features] pyo3 = ["dep:pyo3"] diff --git a/hugr-model/src/v0/ast/mod.rs b/hugr-model/src/v0/ast/mod.rs index b6e817b990..4117227526 100644 --- a/hugr-model/src/v0/ast/mod.rs +++ b/hugr-model/src/v0/ast/mod.rs @@ -43,6 +43,7 @@ pub use resolve::ResolveError; /// /// [`table::Package`]: crate::v0::table::Package #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Package { /// The sequence of modules in the package. pub modules: Vec, @@ -70,6 +71,7 @@ impl Package { /// /// [`table::Module`]: crate::v0::table::Module #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Module { /// The root region of the module. /// @@ -103,6 +105,7 @@ impl Module { /// /// [`table::Node`]: crate::v0::table::Node #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Node { /// The operation that the node performs. pub operation: Operation, @@ -129,6 +132,7 @@ pub struct Node { /// /// [`table::Operation`]: crate::v0::table::Operation #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Operation { /// Invalid operation to be used as a placeholder. #[default] @@ -193,6 +197,7 @@ impl Operation { /// /// [`table::Symbol`]: crate::v0::table::Symbol #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Symbol { /// The visibility of the symbol. pub visibility: Option, @@ -212,6 +217,7 @@ pub struct Symbol { /// /// [`table::Param`]: crate::v0::table::Param #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Param { /// The name of the parameter. pub name: VarName, @@ -225,6 +231,7 @@ pub struct Param { /// /// [`table::Region`]: crate::v0::table::Region #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Region { /// The kind of the region. See [`RegionKind`] for details. pub kind: RegionKind, @@ -249,6 +256,7 @@ pub struct Region { /// /// [`table::Term`]: crate::v0::table::Term #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Term { /// Standin for any term. #[default] @@ -279,6 +287,7 @@ impl From for Term { /// /// [`table::SeqPart`]: crate::v0::table::SeqPart #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum SeqPart { /// An individual item in the sequence. Item(Term), diff --git a/hugr-model/src/v0/mod.rs b/hugr-model/src/v0/mod.rs index 40a637e413..b3bc5f3978 100644 --- a/hugr-model/src/v0/mod.rs +++ b/hugr-model/src/v0/mod.rs @@ -402,6 +402,7 @@ impl<'py> pyo3::IntoPyObject<'py> for ScopeClosure { /// The kind of a region. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum RegionKind { /// Data flow region. #[default] @@ -447,6 +448,7 @@ impl<'py> pyo3::IntoPyObject<'py> for RegionKind { /// The name of a variable. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct VarName(SmolStr); impl VarName { @@ -483,6 +485,7 @@ impl<'py> pyo3::IntoPyObject<'py> for &VarName { /// The name of a symbol. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct SymbolName(SmolStr); impl SymbolName { @@ -508,6 +511,7 @@ impl<'py> pyo3::FromPyObject<'py> for SymbolName { /// The name of a link. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct LinkName(SmolStr); impl LinkName { @@ -555,6 +559,7 @@ impl<'py> pyo3::IntoPyObject<'py> for &LinkName { /// sequences of arbitrary length. To enable cheap cloning and sharing, /// strings and byte sequences use reference counting. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Literal { /// String literal. Str(SmolStr),