Skip to content

Commit b835347

Browse files
authored
Implement Coding Abstraction (#1657)
This adds an abstraction over various coding schemes. There's a concrete implementation for both the Reed-Solomon case, and for the trivial case where no coding is performed at all. Tests and benchmarks generic over the scheme are provided.
1 parent a0d076b commit b835347

File tree

18 files changed

+839
-534
lines changed

18 files changed

+839
-534
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ bytes = { version = "1.7.1", default-features = false }
6262
sha2 = { version = "0.10.8", default-features = false }
6363
blake3 = { version = "1.8.2", default-features = false }
6464
rand = { version = "0.8.5", default-features = false }
65+
rand_chacha = { version = "0.3", default-features = false }
6566
rand_core = "0.6.4"
6667
rand_distr = "0.4.3"
6768
futures = "0.3.31"

coding/Cargo.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,22 @@ commonware-storage = { workspace = true, features = ["std"] }
1717
bytes = { workspace = true }
1818
thiserror = { workspace = true }
1919
reed-solomon-simd = "3.0.1"
20+
rand_core = { workspace = true }
2021

2122
[lib]
2223
bench = false
2324

2425
[dev-dependencies]
2526
criterion = { workspace = true }
2627
rand = { workspace = true }
28+
rand_chacha = { workspace = true }
2729

2830
[[bench]]
29-
name = "reed_solomon"
31+
name = "coding_scheme_times"
3032
harness = false
31-
path = "src/reed_solomon/benches/bench.rs"
33+
path = "src/benches/bench.rs"
34+
35+
[[bench]]
36+
name = "coding_scheme_sizes"
37+
harness = false
38+
path = "src/benches/bench_size.rs"

coding/fuzz/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,24 @@ cargo-fuzz = true
1212
commonware-cryptography = { workspace = true }
1313
libfuzzer-sys = { workspace = true }
1414
arbitrary = { workspace = true, features = ["derive"] }
15+
rand_chacha = { workspace = true }
1516

1617
[dependencies.commonware-coding]
1718
path = ".."
1819

20+
[lib]
21+
bench = false
22+
1923
[[bin]]
2024
name = "reed_solomon"
2125
path = "fuzz_targets/reed_solomon.rs"
2226
test = false
2327
doc = false
2428
bench = false
29+
30+
[[bin]]
31+
name = "no_coding"
32+
path = "fuzz_targets/no_coding.rs"
33+
test = false
34+
doc = false
35+
bench = false
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#![no_main]
2+
3+
use commonware_coding::NoCoding;
4+
use commonware_coding_fuzz::{fuzz, FuzzInput};
5+
use commonware_cryptography::Sha256;
6+
use libfuzzer_sys::fuzz_target;
7+
8+
fuzz_target!(|input: FuzzInput| {
9+
fuzz::<NoCoding<Sha256>>(input);
10+
});
Lines changed: 3 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,10 @@
11
#![no_main]
22

3-
use arbitrary::{Arbitrary, Unstructured};
4-
use commonware_coding::reed_solomon::{decode, encode, Chunk};
3+
use commonware_coding::ReedSolomon;
4+
use commonware_coding_fuzz::{fuzz, FuzzInput};
55
use commonware_cryptography::Sha256;
66
use libfuzzer_sys::fuzz_target;
77

8-
#[derive(Debug)]
9-
struct FuzzInput {
10-
total: u16,
11-
min: u16,
12-
data: Vec<u8>,
13-
shuffle_bytes: Vec<u8>,
14-
}
15-
16-
impl<'a> Arbitrary<'a> for FuzzInput {
17-
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
18-
let min = u.int_in_range(1..=u16::MAX - 1)?; // min > 0
19-
let total = u.int_in_range(min + 1..=u16::MAX)?; // total > min
20-
let data_len = u.int_in_range(0..=u32::MAX)?; // data.len() <= u32:Max
21-
let data = u.bytes(data_len as usize)?.to_vec();
22-
let shuffle_bytes = u.bytes(8)?.to_vec();
23-
24-
Ok(FuzzInput {
25-
total,
26-
min,
27-
data,
28-
shuffle_bytes,
29-
})
30-
}
31-
}
32-
33-
#[derive(Clone)]
34-
pub struct ShuffledChunks {
35-
pub chunks: Vec<Chunk<Sha256>>,
36-
}
37-
38-
impl ShuffledChunks {
39-
pub fn from_chunks<I>(chunks: I, fuzz_bytes: &[u8]) -> arbitrary::Result<Self>
40-
where
41-
I: IntoIterator<Item = Chunk<Sha256>>,
42-
{
43-
let mut chunks: Vec<_> = chunks.into_iter().collect();
44-
let mut u = Unstructured::new(fuzz_bytes);
45-
46-
for i in (1..chunks.len()).rev() {
47-
let j = u.int_in_range(0..=i)?;
48-
chunks.swap(i, j);
49-
}
50-
51-
Ok(ShuffledChunks { chunks })
52-
}
53-
}
54-
55-
fn fuzz(input: FuzzInput) {
56-
let total = input.total;
57-
let min = input.min;
58-
let data = input.data;
59-
let shuffle_bytes = input.shuffle_bytes;
60-
61-
// if encode returns Digest then we should be able to decode it later.
62-
// we return in Error case, because the underlying library can panic on arbitrary input.
63-
let (root, chunks) = match encode::<Sha256>(total, min, data.to_vec()) {
64-
Ok(result) => result,
65-
Err(_) => return,
66-
};
67-
68-
assert_eq!(chunks.len(), total as usize);
69-
70-
for (i, chunk) in chunks.iter().enumerate() {
71-
assert!(chunk.verify(i as u16, &root), "failed to verify chunk");
72-
}
73-
74-
let decoded = match decode::<Sha256>(total, min, &root, chunks.clone()) {
75-
Ok(data) => data,
76-
Err(e) => panic!("decode with all chunks failed: {e:?}"),
77-
};
78-
assert_eq!(decoded, data, "decode with all chunks failed");
79-
80-
let subset =
81-
ShuffledChunks::from_chunks(chunks, &shuffle_bytes).expect("failed to shuffle chunks");
82-
83-
let decoded_subset = match decode::<Sha256>(total, min, &root, subset.chunks) {
84-
Ok(data) => data,
85-
Err(e) => panic!("decode with min chunks failed: {e:?}"),
86-
};
87-
assert_eq!(decoded_subset, data);
88-
}
89-
908
fuzz_target!(|input: FuzzInput| {
91-
fuzz(input);
9+
fuzz::<ReedSolomon<Sha256>>(input);
9210
});

coding/fuzz/src/lib.rs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
use arbitrary::{Arbitrary, Unstructured};
2+
use commonware_coding::{Config, Scheme};
3+
4+
#[derive(Debug)]
5+
pub struct FuzzInput {
6+
min: u16,
7+
recovery: u16,
8+
to_use: u16,
9+
data: Vec<u8>,
10+
shuffle_bytes: Vec<u8>,
11+
}
12+
13+
impl<'a> Arbitrary<'a> for FuzzInput {
14+
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
15+
// We need to generate parameters which satisfy the conditions of valid RS coding,
16+
// which are such that if min <= 2^16 - 2^n, then recovery <= 2^n.
17+
let n: u64 = u.int_in_range(0..=15)?;
18+
let mut min = u.int_in_range(1..=u16::try_from((1 << 16) - (1 << n)).unwrap())?;
19+
let mut recovery = u.int_in_range(1..=u16::try_from(1 << n).unwrap())?;
20+
// Correction to make sure that we can fit min + recovery in a u16.
21+
if min.checked_add(recovery).is_none() {
22+
if recovery > 1 {
23+
recovery -= 1;
24+
} else {
25+
min -= 1;
26+
}
27+
}
28+
let to_use = u.int_in_range(min..=min + recovery)?;
29+
let data_len = u.int_in_range(0..=u32::MAX)?; // data.len() <= u32:Max
30+
let data = u.bytes(data_len as usize)?.to_vec();
31+
let shuffle_bytes = u.bytes(8)?.to_vec();
32+
33+
Ok(FuzzInput {
34+
recovery,
35+
min,
36+
to_use,
37+
data,
38+
shuffle_bytes,
39+
})
40+
}
41+
}
42+
43+
fn shuffle<T>(shuffle_bytes: &[u8], data: &mut [T]) {
44+
let mut u = Unstructured::new(shuffle_bytes);
45+
46+
for i in (1..data.len()).rev() {
47+
let j = u.int_in_range(0..=i).unwrap();
48+
data.swap(i, j);
49+
}
50+
}
51+
52+
pub fn fuzz<S: Scheme>(input: FuzzInput) {
53+
let FuzzInput {
54+
recovery,
55+
min,
56+
to_use,
57+
data,
58+
shuffle_bytes,
59+
} = input;
60+
61+
let config = Config {
62+
minimum_shards: min,
63+
extra_shards: recovery,
64+
};
65+
let (commitment, mut shards) = S::encode(&config, data.as_slice()).unwrap();
66+
assert_eq!(shards.len(), (recovery + min) as usize);
67+
// Each participant checks their shard
68+
let mut reshards = shards
69+
.iter()
70+
.map(|(shard, proof)| S::check(&commitment, proof, shard).unwrap())
71+
.collect::<Vec<_>>();
72+
// The last shard is "ours"
73+
let (my_shard, _) = shards.pop().unwrap();
74+
// We shuffle the remaining reshards
75+
reshards.truncate(reshards.len() - 1);
76+
shuffle(&shuffle_bytes, &mut reshards);
77+
// We decode using the specified number of reshards, and ours.
78+
let decoded = S::decode(
79+
&config,
80+
&commitment,
81+
my_shard,
82+
&reshards[..(to_use - 1) as usize],
83+
)
84+
.unwrap();
85+
assert_eq!(&decoded, &data);
86+
}

coding/src/benches/bench.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use commonware_coding::{Config, Scheme};
2+
use criterion::{criterion_main, BatchSize, Criterion};
3+
use rand::{seq::SliceRandom, RngCore, SeedableRng as _};
4+
use rand_chacha::ChaCha8Rng;
5+
6+
mod no_coding;
7+
mod reed_solomon;
8+
9+
pub(crate) fn benchmark_encode_generic<S: Scheme>(name: &str, c: &mut Criterion) {
10+
let mut rng = ChaCha8Rng::seed_from_u64(0);
11+
let cases = [8, 12, 16, 19, 20, 24].map(|i| 2usize.pow(i));
12+
for data_length in cases.into_iter() {
13+
for chunks in [10, 25, 50, 100, 250] {
14+
let min = chunks / 3;
15+
let config = Config {
16+
minimum_shards: min as u16,
17+
extra_shards: (chunks - min) as u16,
18+
};
19+
c.bench_function(
20+
&format!("{}/msg_len={} chunks={}", name, data_length, chunks),
21+
|b| {
22+
b.iter_batched(
23+
|| {
24+
// Generate random data
25+
let mut data = vec![0u8; data_length];
26+
rng.fill_bytes(&mut data);
27+
data
28+
},
29+
|data| S::encode(&config, data.as_slice()),
30+
BatchSize::SmallInput,
31+
);
32+
},
33+
);
34+
}
35+
}
36+
}
37+
38+
pub(crate) fn benchmark_decode_generic<S: Scheme>(name: &str, c: &mut Criterion) {
39+
let mut rng = ChaCha8Rng::seed_from_u64(0);
40+
let cases = [8, 12, 16, 19, 20, 24].map(|i| 2usize.pow(i));
41+
for data_length in cases.into_iter() {
42+
for chunks in [10, 25, 50, 100, 250] {
43+
let min = chunks / 3;
44+
let config = Config {
45+
minimum_shards: min as u16,
46+
extra_shards: (chunks - min) as u16,
47+
};
48+
c.bench_function(
49+
&format!("{}/msg_len={} chunks={}", name, data_length, chunks),
50+
|b| {
51+
b.iter_batched(
52+
|| {
53+
// Generate random data
54+
let mut data = vec![0u8; data_length];
55+
rng.fill_bytes(&mut data);
56+
57+
// Encode data
58+
let (commitment, mut shards) =
59+
S::encode(&config, data.as_slice()).unwrap();
60+
61+
shards.shuffle(&mut rng);
62+
let my_shard_and_proof = shards.pop().unwrap();
63+
let reshards = shards
64+
.iter()
65+
.take(min)
66+
.map(|(shard, proof)| S::check(&commitment, proof, shard).unwrap())
67+
.collect::<Vec<_>>();
68+
69+
(commitment, my_shard_and_proof, reshards)
70+
},
71+
// We include the cost of checking your shard as part of decoding
72+
|(commitment, (my_shard, my_proof), reshards)| {
73+
S::check(&commitment, &my_proof, &my_shard).unwrap();
74+
S::decode(&config, &commitment, my_shard, &reshards).unwrap();
75+
},
76+
BatchSize::SmallInput,
77+
);
78+
},
79+
);
80+
}
81+
}
82+
}
83+
84+
criterion_main!(reed_solomon::benches, no_coding::benches);

0 commit comments

Comments
 (0)