-
Notifications
You must be signed in to change notification settings - Fork 143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce copying and allocations #422
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -210,9 +210,9 @@ impl<R: Read> Decoder<R> { | |
subframe: SubframeInfo::not_yet_init(), | ||
fctl_read: 0, | ||
next_frame: SubframeIdx::Initial, | ||
prev: Vec::new(), | ||
current: Vec::new(), | ||
scan_start: 0, | ||
data_stream: Vec::new(), | ||
prev_start: 0, | ||
current_start: 0, | ||
transform: self.transform, | ||
scratch_buffer: Vec::new(), | ||
limits: self.limits, | ||
|
@@ -347,12 +347,12 @@ pub struct Reader<R: Read> { | |
/// control chunk. The IDAT image _may_ have such a chunk applying to it. | ||
fctl_read: u32, | ||
next_frame: SubframeIdx, | ||
/// Previous raw line | ||
prev: Vec<u8>, | ||
/// Current raw line | ||
current: Vec<u8>, | ||
/// Start index of the current scan line. | ||
scan_start: usize, | ||
/// Vec containing the uncompressed image data currently being processed. | ||
data_stream: Vec<u8>, | ||
/// Index in `data_stream` where the previous row starts. | ||
prev_start: usize, | ||
/// Index in `data_stream` where the current row starts. | ||
current_start: usize, | ||
/// Output transformations | ||
transform: Transformations, | ||
/// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference | ||
|
@@ -444,8 +444,7 @@ impl<R: Read> Reader<R> { | |
return Err(DecodingError::LimitsExceeded); | ||
} | ||
|
||
self.prev.clear(); | ||
self.prev.resize(self.subframe.rowlen, 0); | ||
self.prev_start = self.current_start; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. optional, feel free to ignore: There is minimal code duplication here (e.g. new line 710 looks the same) + I wonder if the code would be more readable if this single line was hoisted into a well-named separate function (maybe |
||
|
||
Ok(()) | ||
} | ||
|
@@ -504,8 +503,9 @@ impl<R: Read> Reader<R> { | |
line_size: self.output_line_size(self.subframe.width), | ||
}; | ||
|
||
self.current.clear(); | ||
self.scan_start = 0; | ||
self.data_stream.clear(); | ||
self.current_start = 0; | ||
self.prev_start = 0; | ||
let width = self.info().width; | ||
if self.info().interlaced { | ||
while let Some(InterlacedRow { | ||
|
@@ -597,7 +597,8 @@ impl<R: Read> Reader<R> { | |
output_buffer: &mut [u8], | ||
) -> Result<(), DecodingError> { | ||
self.next_raw_interlaced_row(rowlen)?; | ||
let row = &self.prev[1..rowlen]; | ||
assert_eq!(self.current_start - self.prev_start, rowlen - 1); | ||
let row = &self.data_stream[self.prev_start..self.current_start]; | ||
|
||
// Apply transformations and write resulting data to buffer. | ||
let (color_type, bit_depth, trns) = { | ||
|
@@ -706,8 +707,7 @@ impl<R: Read> Reader<R> { | |
let (pass, line, width) = adam7.next()?; | ||
let rowlen = self.info().raw_row_length_from_width(width); | ||
if last_pass != pass { | ||
self.prev.clear(); | ||
self.prev.resize(rowlen, 0u8); | ||
self.prev_start = self.current_start; | ||
} | ||
Some((rowlen, InterlaceInfo::Adam7 { pass, line, width })) | ||
} | ||
|
@@ -723,27 +723,28 @@ impl<R: Read> Reader<R> { | |
/// The scanline is filtered against the previous scanline according to the specification. | ||
fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> { | ||
// Read image data until we have at least one full row (but possibly more than one). | ||
while self.current.len() - self.scan_start < rowlen { | ||
while self.data_stream.len() - self.current_start < rowlen { | ||
if self.subframe.consumed_and_flushed { | ||
return Err(DecodingError::Format( | ||
FormatErrorInner::NoMoreImageData.into(), | ||
)); | ||
} | ||
|
||
// Clear the current buffer before appending more data. | ||
if self.scan_start > 0 { | ||
self.current.drain(..self.scan_start).for_each(drop); | ||
self.scan_start = 0; | ||
if self.prev_start > 0 { | ||
self.data_stream.drain(..self.prev_start).for_each(drop); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
I can't really read assembly well, but the OTOH, maybe this feedback doesn't belong to this PR and we should fix this in a separate PR? (e.g. |
||
self.current_start -= self.prev_start; | ||
self.prev_start = 0; | ||
} | ||
|
||
match self.decoder.decode_next(&mut self.current)? { | ||
match self.decoder.decode_next(&mut self.data_stream)? { | ||
Some(Decoded::ImageData) => {} | ||
Some(Decoded::ImageDataFlushed) => { | ||
self.subframe.consumed_and_flushed = true; | ||
} | ||
None => { | ||
return Err(DecodingError::Format( | ||
if self.current.is_empty() { | ||
if self.data_stream.is_empty() { | ||
FormatErrorInner::NoMoreImageData | ||
} else { | ||
FormatErrorInner::UnexpectedEndOfChunk | ||
|
@@ -756,17 +757,21 @@ impl<R: Read> Reader<R> { | |
} | ||
|
||
// Get a reference to the current row and point scan_start to the next one. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Probably need to remove the remaining reference to |
||
let row = &mut self.current[self.scan_start..]; | ||
self.scan_start += rowlen; | ||
let (prev, row) = self.data_stream.split_at_mut(self.current_start); | ||
|
||
// Unfilter the row. | ||
let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format( | ||
FormatErrorInner::UnknownFilterMethod(row[0]).into(), | ||
))?; | ||
unfilter(filter, self.bpp, &self.prev[1..rowlen], &mut row[1..rowlen]); | ||
|
||
// Save the current row for the next pass. | ||
self.prev[..rowlen].copy_from_slice(&row[..rowlen]); | ||
unfilter( | ||
filter, | ||
self.bpp, | ||
&prev[self.prev_start..], | ||
&mut row[1..rowlen], | ||
); | ||
|
||
self.prev_start = self.current_start + 1; | ||
self.current_start += rowlen; | ||
|
||
Ok(()) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -282,13 +282,22 @@ fn filter_paeth(a: u8, b: u8, c: u8) -> u8 { | |
} | ||
|
||
pub(crate) fn unfilter( | ||
filter: FilterType, | ||
mut filter: FilterType, | ||
tbpp: BytesPerPixel, | ||
previous: &[u8], | ||
current: &mut [u8], | ||
) { | ||
use self::FilterType::*; | ||
|
||
// If the previous row is empty, then treat it as if it were filled with zeros. | ||
if previous.is_empty() { | ||
if filter == Paeth { | ||
filter = Sub; | ||
} else if filter == Up { | ||
filter = NoFilter; | ||
} | ||
} | ||
|
||
// [2023/01 @okaneco] - Notes on optimizing decoding filters | ||
// | ||
// Links: | ||
|
@@ -452,6 +461,82 @@ pub(crate) fn unfilter( | |
*curr = curr.wrapping_add(above); | ||
} | ||
} | ||
Avg if previous.is_empty() => match tbpp { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if |
||
BytesPerPixel::One => { | ||
current.iter_mut().reduce(|&mut prev, curr| { | ||
*curr = curr.wrapping_add(prev / 2); | ||
curr | ||
}); | ||
} | ||
BytesPerPixel::Two => { | ||
let mut prev = [0; 2]; | ||
for chunk in current.chunks_exact_mut(2) { | ||
let new_chunk = [ | ||
chunk[0].wrapping_add(prev[0] / 2), | ||
chunk[1].wrapping_add(prev[1] / 2), | ||
]; | ||
*TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk; | ||
prev = new_chunk; | ||
} | ||
} | ||
BytesPerPixel::Three => { | ||
let mut prev = [0; 3]; | ||
for chunk in current.chunks_exact_mut(3) { | ||
let new_chunk = [ | ||
chunk[0].wrapping_add(prev[0] / 2), | ||
chunk[1].wrapping_add(prev[1] / 2), | ||
chunk[2].wrapping_add(prev[2] / 2), | ||
]; | ||
*TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk; | ||
prev = new_chunk; | ||
} | ||
} | ||
BytesPerPixel::Four => { | ||
let mut prev = [0; 4]; | ||
for chunk in current.chunks_exact_mut(4) { | ||
let new_chunk = [ | ||
chunk[0].wrapping_add(prev[0] / 2), | ||
chunk[1].wrapping_add(prev[1] / 2), | ||
chunk[2].wrapping_add(prev[2] / 2), | ||
chunk[3].wrapping_add(prev[3] / 2), | ||
]; | ||
*TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk; | ||
prev = new_chunk; | ||
} | ||
} | ||
BytesPerPixel::Six => { | ||
let mut prev = [0; 6]; | ||
for chunk in current.chunks_exact_mut(6) { | ||
let new_chunk = [ | ||
chunk[0].wrapping_add(prev[0] / 2), | ||
chunk[1].wrapping_add(prev[1] / 2), | ||
chunk[2].wrapping_add(prev[2] / 2), | ||
chunk[3].wrapping_add(prev[3] / 2), | ||
chunk[4].wrapping_add(prev[4] / 2), | ||
chunk[5].wrapping_add(prev[5] / 2), | ||
]; | ||
*TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk; | ||
prev = new_chunk; | ||
} | ||
} | ||
BytesPerPixel::Eight => { | ||
let mut prev = [0; 8]; | ||
for chunk in current.chunks_exact_mut(8) { | ||
let new_chunk = [ | ||
chunk[0].wrapping_add(prev[0] / 2), | ||
chunk[1].wrapping_add(prev[1] / 2), | ||
chunk[2].wrapping_add(prev[2] / 2), | ||
chunk[3].wrapping_add(prev[3] / 2), | ||
chunk[4].wrapping_add(prev[4] / 2), | ||
chunk[5].wrapping_add(prev[5] / 2), | ||
chunk[6].wrapping_add(prev[6] / 2), | ||
chunk[7].wrapping_add(prev[7] / 2), | ||
]; | ||
*TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk; | ||
prev = new_chunk; | ||
} | ||
} | ||
}, | ||
Avg => match tbpp { | ||
BytesPerPixel::One => { | ||
let mut lprev = [0; 1]; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it would be useful to point out that
prev_start
points at pixel data -rowlen - 1
worth of bytes, but 2)current_start
points filter kind byte + pixel data (potentially incomplete - at mostrowlen
worth of bytes).prev_start == current_start
indicates that there is no previous row