diff --git a/Cargo.toml b/Cargo.toml index 7aa286c2..99d0e4bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,3 +51,9 @@ path = "benches/unfilter.rs" name = "unfilter" harness = false required-features = ["benchmarks"] + +[[bench]] +path = "benches/expand_paletted.rs" +name = "expand_paletted" +harness = false +required-features = ["benchmarks"] \ No newline at end of file diff --git a/benches/expand_paletted.rs b/benches/expand_paletted.rs new file mode 100644 index 00000000..06294368 --- /dev/null +++ b/benches/expand_paletted.rs @@ -0,0 +1,155 @@ +//! Usage example: +//! +//! ``` +//! $ alias bench="rustup run nightly cargo bench" +//! $ bench --bench=expand_paletted --features=benchmarks -- --save-baseline my_baseline +//! ... tweak something ... +//! $ bench --bench=expand_paletted --features=benchmarks -- --baseline my_baseline +//! ``` + +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use png::benchable_apis::{create_info_from_plte_trns_bitdepth, create_transform_fn, TransformFn}; +use png::{Info, Transformations}; +use rand::Rng; +use std::fmt::{self, Display}; + +#[derive(Clone, Copy)] +enum TrnsPresence { + Present, + Absent, +} + +impl Display for TrnsPresence { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TrnsPresence::Present => write!(f, "trns=yes"), + TrnsPresence::Absent => write!(f, "trns=no"), + } + } +} + +fn expand_paletted_all(c: &mut Criterion) { + let trns_options = [TrnsPresence::Absent, TrnsPresence::Present]; + let bit_depths = [4, 8]; + + let input_size = { + let typical_l1_cache_size = 32 * 1024; + let mut factor = 1; // input + factor += 4; // RGBA output + factor += 1; // other data + typical_l1_cache_size / factor + }; + + for trns in trns_options.iter().copied() { + for bit_depth in bit_depths.iter().copied() { + bench_expand_palette(c, trns, bit_depth, input_size); + } + } + + bench_create_fn(c, 256, 256); // Full PLTE and trNS + bench_create_fn(c, 224, 32); // Partial PLTE and trNS + bench_create_fn(c, 16, 1); // Guess: typical for small images? +} + +criterion_group!(benches, expand_paletted_all); +criterion_main!(benches); + +fn get_random_bytes(rng: &mut R, n: usize) -> Vec { + use rand::Fill; + let mut result = vec![0u8; n]; + result.as_mut_slice().try_fill(rng).unwrap(); + result +} + +struct Input { + palette: Vec, + trns: Option>, + src: Vec, + src_bit_depth: u8, +} + +impl Input { + fn new(trns: TrnsPresence, src_bit_depth: u8, input_size_in_bytes: usize) -> Self { + let mut rng = rand::thread_rng(); + + // We provide RGB entries for 192 out of 256 possible indices and Alpha/Transparency + // entries for 32 out of 256 possible indices. Rationale for these numbers: + // * Oftentimes only a handful of colors at the edges of an icon need transparency + // * In general, code needs to handle out-of-bounds indices, so it seems desirable + // to explicitly test this. + let palette = get_random_bytes(&mut rng, 192.min(input_size_in_bytes) * 3); + let trns = match trns { + TrnsPresence::Absent => None, + TrnsPresence::Present => Some(get_random_bytes(&mut rng, 32.min(input_size_in_bytes))), + }; + let src = get_random_bytes(&mut rng, input_size_in_bytes); + + Self { + palette, + trns, + src, + src_bit_depth, + } + } + + fn output_size_in_bytes(&self) -> usize { + let output_bytes_per_input_sample = match self.trns { + None => 3, + Some(_) => 4, + }; + let samples_count_per_byte = (8 / self.src_bit_depth) as usize; + let samples_count = self.src.len() * samples_count_per_byte; + samples_count * output_bytes_per_input_sample + } + + fn to_info(&self) -> Info { + create_info_from_plte_trns_bitdepth(&self.palette, self.trns.as_deref(), self.src_bit_depth) + } +} + +#[inline(always)] +fn create_expand_palette_fn(info: &Info) -> TransformFn { + create_transform_fn(info, Transformations::EXPAND).unwrap() +} + +fn bench_create_fn(c: &mut Criterion, plte_size: usize, trns_size: usize) { + let mut group = c.benchmark_group("expand_paletted(ctor)"); + group.sample_size(10000); + + let mut rng = rand::thread_rng(); + let plte = get_random_bytes(&mut rng, plte_size as usize); + let trns = get_random_bytes(&mut rng, trns_size as usize); + let info = create_info_from_plte_trns_bitdepth(&plte, Some(&trns), 8); + group.bench_with_input( + format!("plte={plte_size}/trns={trns_size:?}"), + &info, + |b, info| { + b.iter(|| create_expand_palette_fn(info)); + }, + ); +} + +fn bench_expand_palette( + c: &mut Criterion, + trns: TrnsPresence, + src_bit_depth: u8, + input_size_in_bytes: usize, +) { + let mut group = c.benchmark_group("expand_paletted(exec)"); + + let input = Input::new(trns, src_bit_depth, input_size_in_bytes); + let transform_fn = create_expand_palette_fn(&input.to_info()); + group.throughput(Throughput::Bytes(input.output_size_in_bytes() as u64)); + group.sample_size(500); + group.bench_with_input( + format!("{trns}/src_bits={src_bit_depth}/src_size={input_size_in_bytes}"), + &input, + |b, input| { + let mut output = vec![0; input.output_size_in_bytes()]; + let info = input.to_info(); + b.iter(|| { + transform_fn(input.src.as_slice(), output.as_mut_slice(), &info); + }); + }, + ); +} diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs index 0be8134f..17b0b0d6 100644 --- a/src/benchable_apis.rs +++ b/src/benchable_apis.rs @@ -3,6 +3,7 @@ use crate::common::BytesPerPixel; use crate::filter::FilterType; +use crate::{BitDepth, ColorType, Info}; /// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only /// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`. @@ -10,3 +11,19 @@ pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8 let tbpp = BytesPerPixel::from_usize(tbpp as usize); crate::filter::unfilter(filter, tbpp, previous, current) } + +pub use crate::decoder::transform::{create_transform_fn, TransformFn}; + +pub fn create_info_from_plte_trns_bitdepth<'a>( + plte: &'a [u8], + trns: Option<&'a [u8]>, + bit_depth: u8, +) -> Info<'a> { + Info { + color_type: ColorType::Indexed, + bit_depth: BitDepth::from_u8(bit_depth).unwrap(), + palette: Some(plte.into()), + trns: trns.map(Into::into), + ..Info::default() + } +} diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 2e02d703..21d51a66 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -1,5 +1,5 @@ mod stream; -mod transform; +pub(crate) mod transform; mod zlib; pub use self::stream::{DecodeOptions, Decoded, DecodingError, StreamingDecoder}; diff --git a/src/decoder/transform.rs b/src/decoder/transform.rs index a407d91f..0cbc0740 100644 --- a/src/decoder/transform.rs +++ b/src/decoder/transform.rs @@ -245,3 +245,146 @@ pub fn expand_gray_u8_with_trns(row: &[u8], buffer: &mut [u8], info: &Info) { chunk[0] = pixel * scaling_factor }); } + +#[cfg(test)] +mod test { + use crate::{BitDepth, ColorType, Info, Transformations}; + + fn expand_paletted( + src: &[u8], + src_bit_depth: u8, + palette: &[u8], + trns: Option<&[u8]>, + ) -> Vec { + let info = Info { + color_type: ColorType::Indexed, + bit_depth: BitDepth::from_u8(src_bit_depth).unwrap(), + palette: Some(palette.into()), + trns: trns.map(Into::into), + ..Info::default() + }; + let output_bytes_per_input_sample = match trns { + None => 3, + Some(_) => 4, + }; + let samples_count_per_byte = (8 / src_bit_depth) as usize; + let samples_count = src.len() * samples_count_per_byte; + let mut dst = vec![0; samples_count * output_bytes_per_input_sample]; + let transform_fn = super::create_transform_fn(&info, Transformations::EXPAND).unwrap(); + transform_fn(src, dst.as_mut_slice(), &info); + dst + } + + #[test] + fn test_expand_paletted_rgba_8bit() { + let actual = expand_paletted( + &[0, 1, 2, 3], // src + 8, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 4, 5, 6, // entry #1 + 8, 9, 10, // entry #2 + 12, 13, 14, // entry #3 + ], + Some(&[3, 7, 11, 15]), // trns + ); + assert_eq!(actual, (0..16).collect::>()); + } + + #[test] + fn test_expand_paletted_rgb_8bit() { + let actual = expand_paletted( + &[0, 1, 2, 3], // src + 8, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 3, 4, 5, // entry #1 + 6, 7, 8, // entry #2 + 9, 10, 11, // entry #3 + ], + None, // trns + ); + assert_eq!(actual, (0..12).collect::>()); + } + + #[test] + fn test_expand_paletted_rgba_4bit() { + let actual = expand_paletted( + &[0x01, 0x23], // src + 4, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 4, 5, 6, // entry #1 + 8, 9, 10, // entry #2 + 12, 13, 14, // entry #3 + ], + Some(&[3, 7, 11, 15]), // trns + ); + assert_eq!(actual, (0..16).collect::>()); + } + + #[test] + fn test_expand_paletted_rgb_4bit() { + let actual = expand_paletted( + &[0x01, 0x23], // src + 4, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 3, 4, 5, // entry #1 + 6, 7, 8, // entry #2 + 9, 10, 11, // entry #3 + ], + None, // trns + ); + assert_eq!(actual, (0..12).collect::>()); + } + + #[test] + fn test_expand_paletted_rgba_8bit_more_trns_entries_than_palette_entries() { + let actual = expand_paletted( + &[0, 1, 2, 3], // src + 8, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 4, 5, 6, // entry #1 + 8, 9, 10, // entry #2 + 12, 13, 14, // entry #3 + ], + Some(&[123; 5]), // trns + ); + + // Invalid (too-long) `trns` means that we'll use 0xFF / opaque alpha everywhere. + assert_eq!( + actual, + vec![0, 1, 2, 0xFF, 4, 5, 6, 0xFF, 8, 9, 10, 0xFF, 12, 13, 14, 0xFF], + ); + } + + #[test] + fn test_expand_paletted_rgba_8bit_less_trns_entries_than_palette_entries() { + let actual = expand_paletted( + &[0, 1, 2, 3], // src + 8, // src_bit_depth + &[ + // palette + 0, 1, 2, // entry #0 + 4, 5, 6, // entry #1 + 8, 9, 10, // entry #2 + 12, 13, 14, // entry #3 + ], + Some(&[3, 7]), // trns + ); + + // Too-short `trns` is treated differently from too-long - only missing entries are + // replaced with 0XFF / opaque. + assert_eq!( + actual, + vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0xFF, 12, 13, 14, 0xFF], + ); + } +}