Skip to content

Commit

Permalink
Memoize which row transformation function should be used.
Browse files Browse the repository at this point in the history
Instead of deciding which function to use for every row, memoize and
reuse the first decision.

One desirable outcome of this commit is making the public API of the
`transform.rs` module quite thin (just the `TransformFn` type alias and
the `create_transform_fn` function) - this makes this functionality
easier to test and benchmark.

Another desirable outcome is a small runtime improvement in most
benchmarks (compared to the baseline just before the commit that
introduces `transform.rs`):

decode/paletted-zune.png: [-8.7989% -7.4940% -6.1466%] (p = 0.00 < 0.05)
decode/kodim02.png: [-4.4824% -4.0883% -3.6232%] (p = 0.00 < 0.05)
decode/Transparency.png: [-4.5886% -3.5213% -2.2121%] (p = 0.00 < 0.05)
decode/kodim17.png: [-2.4406% -2.0663% -1.7093%] (p = 0.00 < 0.05)
decode/kodim07.png: [-3.4461% -2.8264% -2.2676%] (p = 0.00 < 0.05)
decode/kodim23.png: [-1.7490% -1.3101% -0.7639%] (p = 0.00 < 0.05)
decode/Lohengrin: [-2.9387% -2.3664% -1.7545%] (p = 0.00 < 0.05)
generated-noncompressed-4k-idat/8x8.png: [-4.0353% -3.5931% -3.1529%] (p = 0.00 < 0.05)
generated-noncompressed-4k-idat/128x128.png: [-5.2607% -4.6452% -4.0279%] (p = 0.00 < 0.05)
generated-noncompressed-4k-idat/2048x2048.png: [-3.0347% -1.7376% -0.4028%] (p = 0.03 < 0.05)
generated-noncompressed-64k-idat/128x128.png: [-2.3769% -1.7924% -1.2211%] (p = 0.00 < 0.05)
generated-noncompressed-64k-idat/2048x2048.png: [-12.113% -9.8099% -7.2633%] (p = 0.00 < 0.05)
generated-noncompressed-64k-idat/12288x12288.png: [-5.0077% -1.4750% +1.4708%] (p = 0.43 > 0.05)
generated-noncompressed-2g-idat/12288x12288.png: [-9.1860% -8.2857% -7.3934%] (p = 0.00 < 0.05)

Some regressions were observed in 2 benchmarks:

generated-noncompressed-4k-idat/12288x12288.png:
[+2.5010% +3.1616% +3.8445%] (p = 0.00 < 0.05)
[+3.6046% +4.6592% +5.8580%] (p = 0.00 < 0.05)
[+4.6484% +5.4718% +6.4193%] (p = 0.00 < 0.05)

generated-noncompressed-2g-idat/2048x2048.png:
[-0.6455% +1.9676% +3.9191%] (p = 0.13 > 0.05)
[+6.7491% +8.4227% +10.791%] (p = 0.00 < 0.05)
[+5.9926% +7.2249% +8.5428%] (p = 0.00 < 0.05)
  • Loading branch information
anforowicz authored and fintelia committed Jan 27, 2024
1 parent 64970c6 commit 2c65362
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 62 deletions.
72 changes: 11 additions & 61 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod zlib;

pub use self::stream::{DecodeOptions, Decoded, DecodingError, StreamingDecoder};
use self::stream::{FormatErrorInner, CHUNCK_BUFFER_SIZE};
use self::transform::*;
use self::transform::{create_transform_fn, TransformFn};

use std::io::{BufRead, BufReader, Read};
use std::mem;
Expand Down Expand Up @@ -229,6 +229,7 @@ impl<R: Read> Decoder<R> {
prev_start: 0,
current_start: 0,
transform: self.transform,
transform_fn: None,
scratch_buffer: Vec::new(),
};

Expand Down Expand Up @@ -369,6 +370,9 @@ pub struct Reader<R: Read> {
current_start: usize,
/// Output transformations
transform: Transformations,
/// Function that can transform decompressed, unfiltered rows into final output.
/// See the `transform.rs` module for more details.
transform_fn: Option<TransformFn>,
/// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference
/// to a byte slice. In a future version of this library, this buffer will be removed and
/// `next_row` and `next_interlaced_row` will write directly into a user provided output buffer.
Expand Down Expand Up @@ -630,67 +634,13 @@ impl<R: Read> Reader<R> {
let row = &self.data_stream[self.prev_start..self.current_start];

// Apply transformations and write resulting data to buffer.
let (color_type, bit_depth, trns) = {
let info = self.info();
(
info.color_type,
info.bit_depth as u8,
info.trns.is_some() || self.transform.contains(Transformations::ALPHA),
)
};
let expand = self.transform.contains(Transformations::EXPAND)
|| self.transform.contains(Transformations::ALPHA);
let strip16 = bit_depth == 16 && self.transform.contains(Transformations::STRIP_16);
let info = self.decoder.info().unwrap();
match color_type {
ColorType::Indexed if expand => {
if info.palette.is_none() {
return Err(DecodingError::Format(
FormatErrorInner::PaletteRequired.into(),
));
} else if let BitDepth::Sixteen = info.bit_depth {
// This should have been caught earlier but let's check again. Can't hurt.
return Err(DecodingError::Format(
FormatErrorInner::InvalidColorBitDepth {
color_type: ColorType::Indexed,
bit_depth: BitDepth::Sixteen,
}
.into(),
));
} else {
if trns {
expand_paletted_into_rgba8(row, output_buffer, info);
} else {
expand_paletted_into_rgb8(row, output_buffer, info);
}
}
let transform_fn = {
if self.transform_fn.is_none() {
self.transform_fn = Some(create_transform_fn(self.info(), self.transform)?);
}
ColorType::Grayscale | ColorType::GrayscaleAlpha if bit_depth < 8 && expand => {
if trns {
expand_gray_u8_with_trns(row, output_buffer, info)
} else {
expand_gray_u8(row, output_buffer, info)
}
}
ColorType::Grayscale | ColorType::Rgb if expand && trns => {
if bit_depth == 8 {
expand_trns_line(row, output_buffer, info);
} else if strip16 {
expand_trns_and_strip_line16(row, output_buffer, info);
} else {
assert_eq!(bit_depth, 16);
expand_trns_line16(row, output_buffer, info);
}
}
ColorType::Grayscale | ColorType::GrayscaleAlpha | ColorType::Rgb | ColorType::Rgba
if strip16 =>
{
for i in 0..row.len() / 2 {
output_buffer[i] = row[2 * i];
}
}
_ => output_buffer.copy_from_slice(row),
}
self.transform_fn.unwrap()
};
transform_fn(row, output_buffer, self.info());

Ok(())
}
Expand Down
84 changes: 83 additions & 1 deletion src/decoder/transform.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,88 @@
//! Transforming a decompressed, unfiltered row into the final output.
use crate::Info;
use crate::{BitDepth, ColorType, DecodingError, Info, Transformations};

use super::stream::FormatErrorInner;

/// Type of a function that can transform a decompressed, unfiltered row (the
/// 1st argument) into the final pixels (the 2nd argument), optionally using
/// image metadata (e.g. PLTE data can be accessed using the 3rd argument).
///
/// TODO: If some precomputed state is needed (e.g. to make `expand_paletted...`
/// faster) then consider changing this into `Box<dyn Fn(...)>`.
pub type TransformFn = fn(&[u8], &mut [u8], &Info);

/// Returns a transformation function that should be applied to image rows based
/// on 1) decoded image metadata (`info`) and 2) the transformations requested
/// by the crate client (`transform`).
pub fn create_transform_fn(
info: &Info,
transform: Transformations,
) -> Result<TransformFn, DecodingError> {
let color_type = info.color_type;
let bit_depth = info.bit_depth as u8;
let trns = info.trns.is_some() || transform.contains(Transformations::ALPHA);
let expand =
transform.contains(Transformations::EXPAND) || transform.contains(Transformations::ALPHA);
let strip16 = bit_depth == 16 && transform.contains(Transformations::STRIP_16);
match color_type {
ColorType::Indexed if expand => {
if info.palette.is_none() {
return Err(DecodingError::Format(
FormatErrorInner::PaletteRequired.into(),
));
} else if let BitDepth::Sixteen = info.bit_depth {
// This should have been caught earlier but let's check again. Can't hurt.
return Err(DecodingError::Format(
FormatErrorInner::InvalidColorBitDepth {
color_type: ColorType::Indexed,
bit_depth: BitDepth::Sixteen,
}
.into(),
));
} else {
if trns {
Ok(expand_paletted_into_rgba8)
} else {
Ok(expand_paletted_into_rgb8)
}
}
}
ColorType::Grayscale | ColorType::GrayscaleAlpha if bit_depth < 8 && expand => {
if trns {
Ok(expand_gray_u8_with_trns)
} else {
Ok(expand_gray_u8)
}
}
ColorType::Grayscale | ColorType::Rgb if expand && trns => {
if bit_depth == 8 {
Ok(expand_trns_line)
} else if strip16 {
Ok(expand_trns_and_strip_line16)
} else {
assert_eq!(bit_depth, 16);
Ok(expand_trns_line16)
}
}
ColorType::Grayscale | ColorType::GrayscaleAlpha | ColorType::Rgb | ColorType::Rgba
if strip16 =>
{
Ok(transform_row_strip16)
}
_ => Ok(copy_row),
}
}

fn copy_row(row: &[u8], output_buffer: &mut [u8], _: &Info) {
output_buffer.copy_from_slice(row);
}

fn transform_row_strip16(row: &[u8], output_buffer: &mut [u8], _: &Info) {
for i in 0..row.len() / 2 {
output_buffer[i] = row[2 * i];
}
}

#[inline(always)]
fn unpack_bits<F>(input: &[u8], output: &mut [u8], channels: usize, bit_depth: u8, func: F)
Expand Down

0 comments on commit 2c65362

Please sign in to comment.