Skip to content

Commit

Permalink
Reduce copying and allocations
Browse files Browse the repository at this point in the history
This eliminate Reader::prev and adds special handling of unfiltering
for the first row.
  • Loading branch information
fintelia committed Nov 3, 2023
1 parent 1825c7e commit cb7ed66
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 28 deletions.
54 changes: 27 additions & 27 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,9 @@ impl<R: Read> Decoder<R> {
subframe: SubframeInfo::not_yet_init(),
fctl_read: 0,
next_frame: SubframeIdx::Initial,
prev: Vec::new(),
current: Vec::new(),
scan_start: 0,
data_stream: Vec::new(),
prev_start: 0,
current_start: 0,
transform: self.transform,
scratch_buffer: Vec::new(),
limits: self.limits,
Expand Down Expand Up @@ -347,12 +347,12 @@ pub struct Reader<R: Read> {
/// control chunk. The IDAT image _may_ have such a chunk applying to it.
fctl_read: u32,
next_frame: SubframeIdx,
/// Previous raw line
prev: Vec<u8>,
/// Current raw line
current: Vec<u8>,
/// Start index of the current scan line.
scan_start: usize,
/// Vec containing the uncompressed image data currently being processed.
data_stream: Vec<u8>,
/// Index in `data_stream` where the previous row starts.
prev_start: usize,
/// Index in `data_stream` where the current row starts.
current_start: usize,
/// Output transformations
transform: Transformations,
/// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference
Expand Down Expand Up @@ -444,8 +444,7 @@ impl<R: Read> Reader<R> {
return Err(DecodingError::LimitsExceeded);
}

self.prev.clear();
self.prev.resize(self.subframe.rowlen, 0);
self.prev_start = self.current_start;

Ok(())
}
Expand Down Expand Up @@ -504,8 +503,9 @@ impl<R: Read> Reader<R> {
line_size: self.output_line_size(self.subframe.width),
};

self.current.clear();
self.scan_start = 0;
self.data_stream.clear();
self.current_start = 0;
self.prev_start = 0;
let width = self.info().width;
if self.info().interlaced {
while let Some(InterlacedRow {
Expand Down Expand Up @@ -597,7 +597,8 @@ impl<R: Read> Reader<R> {
output_buffer: &mut [u8],
) -> Result<(), DecodingError> {
self.next_raw_interlaced_row(rowlen)?;
let row = &self.prev[1..rowlen];
assert_eq!(self.current_start - self.prev_start, rowlen - 1);
let row = &self.data_stream[self.prev_start..self.current_start];

// Apply transformations and write resulting data to buffer.
let (color_type, bit_depth, trns) = {
Expand Down Expand Up @@ -706,8 +707,7 @@ impl<R: Read> Reader<R> {
let (pass, line, width) = adam7.next()?;
let rowlen = self.info().raw_row_length_from_width(width);
if last_pass != pass {
self.prev.clear();
self.prev.resize(rowlen, 0u8);
self.prev_start = self.current_start;
}
Some((rowlen, InterlaceInfo::Adam7 { pass, line, width }))
}
Expand All @@ -723,27 +723,28 @@ impl<R: Read> Reader<R> {
/// The scanline is filtered against the previous scanline according to the specification.
fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> {
// Read image data until we have at least one full row (but possibly more than one).
while self.current.len() - self.scan_start < rowlen {
while self.data_stream.len() - self.current_start < rowlen {
if self.subframe.consumed_and_flushed {
return Err(DecodingError::Format(
FormatErrorInner::NoMoreImageData.into(),
));
}

// Clear the current buffer before appending more data.
if self.scan_start > 0 {
self.current.drain(..self.scan_start).for_each(drop);
self.scan_start = 0;
if self.prev_start > 0 {
self.data_stream.drain(..self.prev_start).for_each(drop);
self.current_start -= self.prev_start;
self.prev_start = 0;
}

match self.decoder.decode_next(&mut self.current)? {
match self.decoder.decode_next(&mut self.data_stream)? {
Some(Decoded::ImageData) => {}
Some(Decoded::ImageDataFlushed) => {
self.subframe.consumed_and_flushed = true;
}
None => {
return Err(DecodingError::Format(
if self.current.is_empty() {
if self.data_stream.is_empty() {
FormatErrorInner::NoMoreImageData
} else {
FormatErrorInner::UnexpectedEndOfChunk
Expand All @@ -756,17 +757,16 @@ impl<R: Read> Reader<R> {
}

// Get a reference to the current row and point scan_start to the next one.
let row = &mut self.current[self.scan_start..];
self.scan_start += rowlen;
let (prev, row) = self.data_stream.split_at_mut(self.current_start);

// Unfilter the row.
let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format(
FormatErrorInner::UnknownFilterMethod(row[0]).into(),
))?;
unfilter(filter, self.bpp, &self.prev[1..rowlen], &mut row[1..rowlen]);
unfilter(filter, self.bpp, &prev[self.prev_start..], &mut row[1..rowlen]);

// Save the current row for the next pass.
self.prev[..rowlen].copy_from_slice(&row[..rowlen]);
self.prev_start = self.current_start + 1;
self.current_start += rowlen;

Ok(())
}
Expand Down
87 changes: 86 additions & 1 deletion src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,22 @@ fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
}

pub(crate) fn unfilter(
filter: FilterType,
mut filter: FilterType,
tbpp: BytesPerPixel,
previous: &[u8],
current: &mut [u8],
) {
use self::FilterType::*;

// If the previous row is empty, then treat it as if it were filled with zeros.
if previous.is_empty() {
if filter == Paeth {
filter = Sub;
} else if filter == Up {
filter = NoFilter;
}
}

// [2023/01 @okaneco] - Notes on optimizing decoding filters
//
// Links:
Expand Down Expand Up @@ -452,6 +461,82 @@ pub(crate) fn unfilter(
*curr = curr.wrapping_add(above);
}
}
Avg if previous.is_empty() => match tbpp {
BytesPerPixel::One => {
current.iter_mut().reduce(|&mut prev, curr| {
*curr = curr.wrapping_add(prev / 2);
curr
});
}
BytesPerPixel::Two => {
let mut prev = [0; 2];
for chunk in current.chunks_exact_mut(2) {
let new_chunk = [
chunk[0].wrapping_add(prev[0] / 2),
chunk[1].wrapping_add(prev[1] / 2),
];
*TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
prev = new_chunk;
}
}
BytesPerPixel::Three => {
let mut prev = [0; 3];
for chunk in current.chunks_exact_mut(3) {
let new_chunk = [
chunk[0].wrapping_add(prev[0] / 2),
chunk[1].wrapping_add(prev[1] / 2),
chunk[2].wrapping_add(prev[2] / 2),
];
*TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk;
prev = new_chunk;
}
}
BytesPerPixel::Four => {
let mut prev = [0; 4];
for chunk in current.chunks_exact_mut(4) {
let new_chunk = [
chunk[0].wrapping_add(prev[0] / 2),
chunk[1].wrapping_add(prev[1] / 2),
chunk[2].wrapping_add(prev[2] / 2),
chunk[3].wrapping_add(prev[3] / 2),
];
*TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
prev = new_chunk;
}
}
BytesPerPixel::Six => {
let mut prev = [0; 6];
for chunk in current.chunks_exact_mut(6) {
let new_chunk = [
chunk[0].wrapping_add(prev[0] / 2),
chunk[1].wrapping_add(prev[1] / 2),
chunk[2].wrapping_add(prev[2] / 2),
chunk[3].wrapping_add(prev[3] / 2),
chunk[4].wrapping_add(prev[4] / 2),
chunk[5].wrapping_add(prev[5] / 2),
];
*TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
prev = new_chunk;
}
}
BytesPerPixel::Eight => {
let mut prev = [0; 8];
for chunk in current.chunks_exact_mut(8) {
let new_chunk = [
chunk[0].wrapping_add(prev[0] / 2),
chunk[1].wrapping_add(prev[1] / 2),
chunk[2].wrapping_add(prev[2] / 2),
chunk[3].wrapping_add(prev[3] / 2),
chunk[4].wrapping_add(prev[4] / 2),
chunk[5].wrapping_add(prev[5] / 2),
chunk[6].wrapping_add(prev[6] / 2),
chunk[7].wrapping_add(prev[7] / 2),
];
*TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
prev = new_chunk;
}
}
},
Avg => match tbpp {
BytesPerPixel::One => {
let mut lprev = [0; 1];
Expand Down

0 comments on commit cb7ed66

Please sign in to comment.