From b9037032a06d82858e4e4e94151bdb8db1f7c457 Mon Sep 17 00:00:00 2001 From: awxkee Date: Fri, 11 Oct 2024 00:15:19 +0100 Subject: [PATCH] Big reworking with speed increasing --- src/app/src/main.rs | 4 +- src/image_to_linear_u8.rs | 42 ++++++++++--------- src/lalphabeta_to_image.rs | 75 ++++++++++++++++++++++------------ src/linear_to_image.rs | 20 ++++----- src/linear_to_image_u8.rs | 83 ++++++++++++++++---------------------- src/oklab_to_image.rs | 30 ++++++++++---- 6 files changed, 141 insertions(+), 113 deletions(-) diff --git a/src/app/src/main.rs b/src/app/src/main.rs index bf7c58f..4c35da2 100644 --- a/src/app/src/main.rs +++ b/src/app/src/main.rs @@ -68,7 +68,7 @@ fn main() { lab_store.resize(width as usize * components * height as usize, 0.); let src_stride = width * components as u32; let start_time = Instant::now(); - bgr_to_lalphabeta( + bgr_to_oklab( src_bytes, src_stride, &mut lab_store, @@ -103,7 +103,7 @@ fn main() { // } let start_time = Instant::now(); - lalphabeta_to_bgr( + oklab_to_bgr( &lab_store, store_stride as u32, &mut dst_slice, diff --git a/src/image_to_linear_u8.rs b/src/image_to_linear_u8.rs index 4db8e5a..f8e50ad 100644 --- a/src/image_to_linear_u8.rs +++ b/src/image_to_linear_u8.rs @@ -41,26 +41,28 @@ fn channels_to_linear( .chunks_exact_mut(dst_stride as usize) .zip(l_src.chunks_exact(src_stride as usize)) { - let mut _cx = 0usize; - - for x in _cx..width as usize { - let px = x * channels; - let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset()); - let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset()); - let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset()); - - let rgb = Rgb::::new(r, g, b); - - *dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) = - *lut_table.get_unchecked(rgb.r as usize); - *dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) = - *lut_table.get_unchecked(rgb.g as usize); - *dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) = - *lut_table.get_unchecked(rgb.b as usize); - - if USE_ALPHA && image_configuration.has_alpha() { - let a = *src_row.get_unchecked(px + image_configuration.get_a_channel_offset()); - *dst_row.get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a; + unsafe { + let mut _cx = 0usize; + + for x in _cx..width as usize { + let px = x * channels; + let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset()); + let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset()); + let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset()); + + let rgb = Rgb::::new(r, g, b); + + *dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) = + *lut_table.get_unchecked(rgb.r as usize); + *dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) = + *lut_table.get_unchecked(rgb.g as usize); + *dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) = + *lut_table.get_unchecked(rgb.b as usize); + + if USE_ALPHA && image_configuration.has_alpha() { + let a = *src_row.get_unchecked(px + image_configuration.get_a_channel_offset()); + *dst_row.get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a; + } } } } diff --git a/src/lalphabeta_to_image.rs b/src/lalphabeta_to_image.rs index 9cf6656..d1f7aec 100644 --- a/src/lalphabeta_to_image.rs +++ b/src/lalphabeta_to_image.rs @@ -102,42 +102,65 @@ fn lalphabeta_to_image( #[cfg(not(feature = "rayon"))] { - let mut src_offset = 0usize; - let mut dst_offset = 0usize; + for (dst, src) in dst + .chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.chunks_exact(src_stride as usize)) + { + unsafe { + let mut _cx = 0usize; - for _ in 0..height as usize { - let mut _cx = 0usize; + let src_ptr = src.as_ptr() as *mut f32; - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + let mut transient_row = vec![0f32; width as usize * channels]; - for x in _cx..width as usize { - let px = x * channels; - let l_x = unsafe { src_ptr.add(px).read_unaligned() }; - let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; - let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; - let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); - let rgb = lalphabeta.to_rgb(transfer_function); + for x in _cx..width as usize { + let px = x * channels; + let l_x = src_ptr.add(px).read_unaligned(); + let l_y = src_ptr.add(px + 1).read_unaligned(); + let l_z = src_ptr.add(px + 2).read_unaligned(); + let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); + let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65); - unsafe { - let dst = dst_ptr.add(x * channels); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); + let dst = transient_row.get_unchecked_mut((x * channels)..); + *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r; + *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g; + *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b; if image_configuration.has_alpha() { let l_a = src_ptr.add(px + 3).read_unaligned(); let a_value = (l_a * 255f32).max(0f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = + a_value; } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + for (dst, src) in dst + .chunks_exact_mut(channels) + .zip(transient_row.chunks_exact(channels)) + { + let r = src[image_configuration.get_r_channel_offset()]; + let g = src[image_configuration.get_g_channel_offset()]; + let b = src[image_configuration.get_b_channel_offset()]; + + let rgb = (Rgb::::new( + r.min(1f32).max(0f32), + g.min(1f32).max(0f32), + b.min(1f32).max(0f32), + ) * Rgb::::dup(2048f32)) + .round() + .cast::(); + + dst[image_configuration.get_r_channel_offset()] = + *lut_table.get_unchecked(rgb.r.min(2048) as usize); + dst[image_configuration.get_g_channel_offset()] = + *lut_table.get_unchecked(rgb.g.min(2048) as usize); + dst[image_configuration.get_b_channel_offset()] = + *lut_table.get_unchecked(rgb.b.min(2048) as usize); + if image_configuration.has_alpha() { + dst[image_configuration.get_a_channel_offset()] = + src[image_configuration.get_a_channel_offset()] as u8; + } + } + } } } } diff --git a/src/linear_to_image.rs b/src/linear_to_image.rs index 0a76bba..55929a4 100644 --- a/src/linear_to_image.rs +++ b/src/linear_to_image.rs @@ -33,7 +33,7 @@ fn linear_to_gamma_channels::new(r, g, b); - *dst.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize); - *dst.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize); - *dst.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize); + let dst = dst.get_unchecked_mut(px..); + + *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = + *lut_table.get_unchecked(rgb.r as usize); + *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = + *lut_table.get_unchecked(rgb.g as usize); + *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = + *lut_table.get_unchecked(rgb.b as usize); if USE_ALPHA && image_configuration.has_alpha() { let a = src.get_unchecked(px + image_configuration.get_a_channel_offset()); - *dst.get_unchecked_mut(px + 3) = *a; + *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = *a; } } }); @@ -68,54 +73,36 @@ fn linear_to_gamma_channels::new(r, g, b); - let mut rgb = rgb.to_rgb_f32(); - - rgb = rgb.gamma(transfer_function); - - unsafe { - *dst_slice.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize); - *dst_slice.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize); - *dst_slice.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize); - } + for (dst, src) in dst + .chunks_exact_mut(dst_stride as usize) + .zip(src.chunks_exact(src_stride as usize)) + { + unsafe { + let mut _cx = 0usize; + + for x in _cx..width as usize { + let px = x * channels; + let r = *src.get_unchecked(px + image_configuration.get_r_channel_offset()); + let g = *src.get_unchecked(px + image_configuration.get_g_channel_offset()); + let b = *src.get_unchecked(px + image_configuration.get_b_channel_offset()); + + let rgb = Rgb::::new(r, g, b); + + let dst = dst.get_unchecked_mut(px..); - if USE_ALPHA && image_configuration.has_alpha() { - let a = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_a_channel_offset()) - }; - unsafe { - *dst_slice.get_unchecked_mut(px + 3) = a; + *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = + *lut_table.get_unchecked(rgb.r as usize); + *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = + *lut_table.get_unchecked(rgb.g as usize); + *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = + *lut_table.get_unchecked(rgb.b as usize); + + if USE_ALPHA && image_configuration.has_alpha() { + let a = src.get_unchecked(px + image_configuration.get_a_channel_offset()); + *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = *a; } } } - - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; } } } diff --git a/src/oklab_to_image.rs b/src/oklab_to_image.rs index 30e1210..3cf96be 100644 --- a/src/oklab_to_image.rs +++ b/src/oklab_to_image.rs @@ -115,10 +115,17 @@ fn oklab_to_image( .zip(transient_row.chunks_exact_mut(channels)) { let rgb = (Rgb::::new( - src_chunks[image_configuration.get_r_channel_offset()], - src_chunks[image_configuration.get_g_channel_offset()], - src_chunks[image_configuration.get_b_channel_offset()], + src_chunks[image_configuration.get_r_channel_offset()] + .max(0.) + .min(1.), + src_chunks[image_configuration.get_g_channel_offset()] + .max(0.) + .min(1.), + src_chunks[image_configuration.get_b_channel_offset()] + .max(0.) + .min(1.), ) * Rgb::::dup(2048f32)) + .round() .cast::(); dst_chunks[image_configuration.get_r_channel_offset()] = @@ -183,11 +190,18 @@ fn oklab_to_image( .chunks_exact_mut(channels) .zip(transient_row.chunks_exact_mut(channels)) { - let rgb = (Rgb::::new( - src_chunks[image_configuration.get_r_channel_offset()], - src_chunks[image_configuration.get_g_channel_offset()], - src_chunks[image_configuration.get_b_channel_offset()], - ) * Rgb::::dup(2048f32)) + let rgb = (Rgb::new( + src_chunks[image_configuration.get_r_channel_offset()] + .max(0.) + .min(1.), + src_chunks[image_configuration.get_g_channel_offset()] + .max(0.) + .min(1.), + src_chunks[image_configuration.get_b_channel_offset()] + .max(0.) + .min(1.), + ) * Rgb::dup(2048f32)) + .round() .cast::(); dst_chunks[image_configuration.get_r_channel_offset()] =