Memoize which row transformation function should be used.

Instead of deciding which function to use for every row, memoize and reuse the first decision. One desirable outcome of this commit is making the public API of the `transform.rs` module quite thin (just the `TransformFn` type alias and the `create_transform_fn` function) - this makes this functionality easier to test and benchmark. Another desirable outcome is a small runtime improvement in most benchmarks (compared to the baseline just before the commit that introduces `transform.rs`): decode/paletted-zune.png: [-8.7989% -7.4940% -6.1466%] (p = 0.00 < 0.05) decode/kodim02.png: [-4.4824% -4.0883% -3.6232%] (p = 0.00 < 0.05) decode/Transparency.png: [-4.5886% -3.5213% -2.2121%] (p = 0.00 < 0.05) decode/kodim17.png: [-2.4406% -2.0663% -1.7093%] (p = 0.00 < 0.05) decode/kodim07.png: [-3.4461% -2.8264% -2.2676%] (p = 0.00 < 0.05) decode/kodim23.png: [-1.7490% -1.3101% -0.7639%] (p = 0.00 < 0.05) decode/Lohengrin: [-2.9387% -2.3664% -1.7545%] (p = 0.00 < 0.05) generated-noncompressed-4k-idat/8x8.png: [-4.0353% -3.5931% -3.1529%] (p = 0.00 < 0.05) generated-noncompressed-4k-idat/128x128.png: [-5.2607% -4.6452% -4.0279%] (p = 0.00 < 0.05) generated-noncompressed-4k-idat/2048x2048.png: [-3.0347% -1.7376% -0.4028%] (p = 0.03 < 0.05) generated-noncompressed-64k-idat/128x128.png: [-2.3769% -1.7924% -1.2211%] (p = 0.00 < 0.05) generated-noncompressed-64k-idat/2048x2048.png: [-12.113% -9.8099% -7.2633%] (p = 0.00 < 0.05) generated-noncompressed-64k-idat/12288x12288.png: [-5.0077% -1.4750% +1.4708%] (p = 0.43 > 0.05) generated-noncompressed-2g-idat/12288x12288.png: [-9.1860% -8.2857% -7.3934%] (p = 0.00 < 0.05) Some regressions were observed in 2 benchmarks: generated-noncompressed-4k-idat/12288x12288.png: [+2.5010% +3.1616% +3.8445%] (p = 0.00 < 0.05) [+3.6046% +4.6592% +5.8580%] (p = 0.00 < 0.05) [+4.6484% +5.4718% +6.4193%] (p = 0.00 < 0.05) generated-noncompressed-2g-idat/2048x2048.png: [-0.6455% +1.9676% +3.9191%] (p = 0.13 > 0.05) [+6.7491% +8.4227% +10.791%] (p = 0.00 < 0.05) [+5.9926% +7.2249% +8.5428%] (p = 0.00 < 0.05)
image-rs · Jan 27, 2024 · 2c65362 · 2c65362
1 parent 64970c6
commit 2c65362
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 62 deletions.
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
@@ -4,7 +4,7 @@ mod zlib;
 
 pub use self::stream::{DecodeOptions, Decoded, DecodingError, StreamingDecoder};
 use self::stream::{FormatErrorInner, CHUNCK_BUFFER_SIZE};
-use self::transform::*;
+use self::transform::{create_transform_fn, TransformFn};
 
 use std::io::{BufRead, BufReader, Read};
 use std::mem;
@@ -229,6 +229,7 @@ impl<R: Read> Decoder<R> {
             prev_start: 0,
             current_start: 0,
             transform: self.transform,
+            transform_fn: None,
             scratch_buffer: Vec::new(),
         };
 
@@ -369,6 +370,9 @@ pub struct Reader<R: Read> {
     current_start: usize,
     /// Output transformations
     transform: Transformations,
+    /// Function that can transform decompressed, unfiltered rows into final output.
+    /// See the `transform.rs` module for more details.
+    transform_fn: Option<TransformFn>,
     /// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference
     /// to a byte slice. In a future version of this library, this buffer will be removed and
     /// `next_row` and `next_interlaced_row` will write directly into a user provided output buffer.
@@ -630,67 +634,13 @@ impl<R: Read> Reader<R> {
         let row = &self.data_stream[self.prev_start..self.current_start];
 
         // Apply transformations and write resulting data to buffer.
-        let (color_type, bit_depth, trns) = {
-            let info = self.info();
-            (
-                info.color_type,
-                info.bit_depth as u8,
-                info.trns.is_some() || self.transform.contains(Transformations::ALPHA),
-            )
-        };
-        let expand = self.transform.contains(Transformations::EXPAND)
-            || self.transform.contains(Transformations::ALPHA);
-        let strip16 = bit_depth == 16 && self.transform.contains(Transformations::STRIP_16);
-        let info = self.decoder.info().unwrap();
-        match color_type {
-            ColorType::Indexed if expand => {
-                if info.palette.is_none() {
-                    return Err(DecodingError::Format(
-                        FormatErrorInner::PaletteRequired.into(),
-                    ));
-                } else if let BitDepth::Sixteen = info.bit_depth {
-                    // This should have been caught earlier but let's check again. Can't hurt.
-                    return Err(DecodingError::Format(
-                        FormatErrorInner::InvalidColorBitDepth {
-                            color_type: ColorType::Indexed,
-                            bit_depth: BitDepth::Sixteen,
-                        }
-                        .into(),
-                    ));
-                } else {
-                    if trns {
-                        expand_paletted_into_rgba8(row, output_buffer, info);
-                    } else {
-                        expand_paletted_into_rgb8(row, output_buffer, info);
-                    }
-                }
+        let transform_fn = {
+            if self.transform_fn.is_none() {
+                self.transform_fn = Some(create_transform_fn(self.info(), self.transform)?);
             }
-            ColorType::Grayscale | ColorType::GrayscaleAlpha if bit_depth < 8 && expand => {
-                if trns {
-                    expand_gray_u8_with_trns(row, output_buffer, info)
-                } else {
-                    expand_gray_u8(row, output_buffer, info)
-                }
-            }
-            ColorType::Grayscale | ColorType::Rgb if expand && trns => {
-                if bit_depth == 8 {
-                    expand_trns_line(row, output_buffer, info);
-                } else if strip16 {
-                    expand_trns_and_strip_line16(row, output_buffer, info);
-                } else {
-                    assert_eq!(bit_depth, 16);
-                    expand_trns_line16(row, output_buffer, info);
-                }
-            }
-            ColorType::Grayscale | ColorType::GrayscaleAlpha | ColorType::Rgb | ColorType::Rgba
-                if strip16 =>
-            {
-                for i in 0..row.len() / 2 {
-                    output_buffer[i] = row[2 * i];
-                }
-            }
-            _ => output_buffer.copy_from_slice(row),
-        }
+            self.transform_fn.unwrap()
+        };
+        transform_fn(row, output_buffer, self.info());
 
         Ok(())
     }

diff --git a/src/decoder/transform.rs b/src/decoder/transform.rs
@@ -1,6 +1,88 @@
 //! Transforming a decompressed, unfiltered row into the final output.
 
-use crate::Info;
+use crate::{BitDepth, ColorType, DecodingError, Info, Transformations};
+
+use super::stream::FormatErrorInner;
+
+/// Type of a function that can transform a decompressed, unfiltered row (the
+/// 1st argument) into the final pixels (the 2nd argument), optionally using
+/// image metadata (e.g. PLTE data can be accessed using the 3rd argument).
+///
+/// TODO: If some precomputed state is needed (e.g. to make `expand_paletted...`
+/// faster) then consider changing this into `Box<dyn Fn(...)>`.
+pub type TransformFn = fn(&[u8], &mut [u8], &Info);
+
+/// Returns a transformation function that should be applied to image rows based
+/// on 1) decoded image metadata (`info`) and 2) the transformations requested
+/// by the crate client (`transform`).
+pub fn create_transform_fn(
+    info: &Info,
+    transform: Transformations,
+) -> Result<TransformFn, DecodingError> {
+    let color_type = info.color_type;
+    let bit_depth = info.bit_depth as u8;
+    let trns = info.trns.is_some() || transform.contains(Transformations::ALPHA);
+    let expand =
+        transform.contains(Transformations::EXPAND) || transform.contains(Transformations::ALPHA);
+    let strip16 = bit_depth == 16 && transform.contains(Transformations::STRIP_16);
+    match color_type {
+        ColorType::Indexed if expand => {
+            if info.palette.is_none() {
+                return Err(DecodingError::Format(
+                    FormatErrorInner::PaletteRequired.into(),
+                ));
+            } else if let BitDepth::Sixteen = info.bit_depth {
+                // This should have been caught earlier but let's check again. Can't hurt.
+                return Err(DecodingError::Format(
+                    FormatErrorInner::InvalidColorBitDepth {
+                        color_type: ColorType::Indexed,
+                        bit_depth: BitDepth::Sixteen,
+                    }
+                    .into(),
+                ));
+            } else {
+                if trns {
+                    Ok(expand_paletted_into_rgba8)
+                } else {
+                    Ok(expand_paletted_into_rgb8)
+                }
+            }
+        }
+        ColorType::Grayscale | ColorType::GrayscaleAlpha if bit_depth < 8 && expand => {
+            if trns {
+                Ok(expand_gray_u8_with_trns)
+            } else {
+                Ok(expand_gray_u8)
+            }
+        }
+        ColorType::Grayscale | ColorType::Rgb if expand && trns => {
+            if bit_depth == 8 {
+                Ok(expand_trns_line)
+            } else if strip16 {
+                Ok(expand_trns_and_strip_line16)
+            } else {
+                assert_eq!(bit_depth, 16);
+                Ok(expand_trns_line16)
+            }
+        }
+        ColorType::Grayscale | ColorType::GrayscaleAlpha | ColorType::Rgb | ColorType::Rgba
+            if strip16 =>
+        {
+            Ok(transform_row_strip16)
+        }
+        _ => Ok(copy_row),
+    }
+}
+
+fn copy_row(row: &[u8], output_buffer: &mut [u8], _: &Info) {
+    output_buffer.copy_from_slice(row);
+}
+
+fn transform_row_strip16(row: &[u8], output_buffer: &mut [u8], _: &Info) {
+    for i in 0..row.len() / 2 {
+        output_buffer[i] = row[2 * i];
+    }
+}
 
 #[inline(always)]
 fn unpack_bits<F>(input: &[u8], output: &mut [u8], channels: usize, bit_depth: u8, func: F)