From c789fa46e68f69b7560f04873d76fce72edf33bc Mon Sep 17 00:00:00 2001 From: awxkee Date: Tue, 15 Oct 2024 09:13:45 +0100 Subject: [PATCH] Lalpha beta bugfix, codegen --- src/app/Cargo.toml | 2 +- src/image_to_lalphabeta.rs | 75 +++++++++++++------------- src/image_to_sigmoidal.rs | 1 - src/lalphabeta_to_image.rs | 107 ++++++++++++++++++------------------- src/planar_to_linear.rs | 27 +++++----- src/sigmoidal_to_image.rs | 1 - 6 files changed, 101 insertions(+), 112 deletions(-) diff --git a/src/app/Cargo.toml b/src/app/Cargo.toml index 3d3f908..1858ad5 100644 --- a/src/app/Cargo.toml +++ b/src/app/Cargo.toml @@ -4,5 +4,5 @@ version = "0.1.0" edition = "2021" [dependencies] -colorutils-rs = { path = "../../", features = ["rayon"] } +colorutils-rs = { path = "../../", default-features = true } image = "0.25.1" \ No newline at end of file diff --git a/src/image_to_lalphabeta.rs b/src/image_to_lalphabeta.rs index a0dcc3c..2036f21 100644 --- a/src/image_to_lalphabeta.rs +++ b/src/image_to_lalphabeta.rs @@ -53,52 +53,49 @@ fn channels_to_lalphabeta( .zip(src.chunks_exact(src_stride as usize)); } - #[cfg(feature = "rayon")] - { - iter.for_each(|(dst, src)| unsafe { - let mut _cx = 0usize; - - let mut linearized_row = vec![0f32; width as usize * channels]; - for (linear_chunk, src_chunk) in linearized_row - .chunks_exact_mut(channels) - .zip(src.chunks_exact(channels)) - { - linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table - .get_unchecked(src_chunk[image_configuration.get_r_channel_offset()] as usize); - linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table - .get_unchecked(src_chunk[image_configuration.get_g_channel_offset()] as usize); - linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table - .get_unchecked(src_chunk[image_configuration.get_b_channel_offset()] as usize); - if image_configuration.has_alpha() { - linear_chunk[image_configuration.get_a_channel_offset()] = - src_chunk[image_configuration.get_a_channel_offset()] as f32 * (1. / 255.0); - } + iter.for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let mut linearized_row = vec![0f32; width as usize * channels]; + for (linear_chunk, src_chunk) in linearized_row + .chunks_exact_mut(channels) + .zip(src.chunks_exact(channels)) + { + linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table + .get_unchecked(src_chunk[image_configuration.get_r_channel_offset()] as usize); + linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table + .get_unchecked(src_chunk[image_configuration.get_g_channel_offset()] as usize); + linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table + .get_unchecked(src_chunk[image_configuration.get_b_channel_offset()] as usize); + if image_configuration.has_alpha() { + linear_chunk[image_configuration.get_a_channel_offset()] = + src_chunk[image_configuration.get_a_channel_offset()] as f32 * (1. / 255.0); } + } - let dst_ptr = dst.as_mut_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr() as *mut f32; - for x in _cx..width as usize { - let px = x * channels; + for x in _cx..width as usize { + let px = x * channels; - let src = linearized_row.get_unchecked(px..); - let r = *src.get_unchecked(image_configuration.get_r_channel_offset()); - let g = *src.get_unchecked(image_configuration.get_g_channel_offset()); - let b = *src.get_unchecked(image_configuration.get_b_channel_offset()); + let src = linearized_row.get_unchecked(px..); + let r = *src.get_unchecked(image_configuration.get_r_channel_offset()); + let g = *src.get_unchecked(image_configuration.get_g_channel_offset()); + let b = *src.get_unchecked(image_configuration.get_b_channel_offset()); - let rgb = Rgb::::new(r, g, b); - let dst_store = dst_ptr.add(px); - let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65); - dst_store.write_unaligned(lalphabeta.l); - dst_store.add(1).write_unaligned(lalphabeta.alpha); - dst_store.add(2).write_unaligned(lalphabeta.beta); + let rgb = Rgb::::new(r, g, b); + let dst_store = dst_ptr.add(px); + let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65); + dst_store.write_unaligned(lalphabeta.l); + dst_store.add(1).write_unaligned(lalphabeta.alpha); + dst_store.add(2).write_unaligned(lalphabeta.beta); - if image_configuration.has_alpha() { - let a = *src.get_unchecked(image_configuration.get_a_channel_offset()); - dst_store.add(3).write_unaligned(a); - } + if image_configuration.has_alpha() { + let a = *src.get_unchecked(image_configuration.get_a_channel_offset()); + dst_store.add(3).write_unaligned(a); } - }); - } + } + }); } /// This function converts RGB to *lαβ* against D65 white point. This is much more effective than naive direct transformation diff --git a/src/image_to_sigmoidal.rs b/src/image_to_sigmoidal.rs index fc28c07..a0ce064 100644 --- a/src/image_to_sigmoidal.rs +++ b/src/image_to_sigmoidal.rs @@ -18,7 +18,6 @@ use crate::Rgb; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; #[cfg(feature = "rayon")] use rayon::prelude::{ParallelSlice, ParallelSliceMut}; -#[cfg(feature = "rayon")] use std::slice; #[allow(clippy::type_complexity)] diff --git a/src/lalphabeta_to_image.rs b/src/lalphabeta_to_image.rs index b9dc538..85372d4 100644 --- a/src/lalphabeta_to_image.rs +++ b/src/lalphabeta_to_image.rs @@ -53,63 +53,60 @@ fn lalphabeta_to_image( .zip(src_slice_safe_align.chunks_exact(src_stride as usize)); } - #[cfg(feature = "rayon")] - { - iter.for_each(|(dst, src)| unsafe { - let mut _cx = 0usize; - - let src_ptr = src.as_ptr() as *mut f32; - - let mut transient_row = vec![0f32; width as usize * channels]; - - for x in _cx..width as usize { - let px = x * channels; - let l_x = src_ptr.add(px).read_unaligned(); - let l_y = src_ptr.add(px + 1).read_unaligned(); - let l_z = src_ptr.add(px + 2).read_unaligned(); - let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); - let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65); - - let dst = transient_row.get_unchecked_mut((x * channels)..); - *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r; - *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g; - *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b; - if image_configuration.has_alpha() { - let l_a = src_ptr.add(px + 3).read_unaligned(); - let a_value = (l_a * 255f32).max(0f32).round(); - *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = a_value; - } + iter.for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr() as *mut f32; + + let mut transient_row = vec![0f32; width as usize * channels]; + + for x in _cx..width as usize { + let px = x * channels; + let l_x = src_ptr.add(px).read_unaligned(); + let l_y = src_ptr.add(px + 1).read_unaligned(); + let l_z = src_ptr.add(px + 2).read_unaligned(); + let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); + let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65); + + let dst = transient_row.get_unchecked_mut((x * channels)..); + *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r; + *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g; + *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b; + if image_configuration.has_alpha() { + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32).round(); + *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = a_value; } - - for (dst, src) in dst - .chunks_exact_mut(channels) - .zip(transient_row.chunks_exact(channels)) - { - let r = src[image_configuration.get_r_channel_offset()]; - let g = src[image_configuration.get_g_channel_offset()]; - let b = src[image_configuration.get_b_channel_offset()]; - - let rgb = (Rgb::::new( - r.min(1f32).max(0f32), - g.min(1f32).max(0f32), - b.min(1f32).max(0f32), - ) * Rgb::::dup(2048f32)) - .round() - .cast::(); - - *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = - *lut_table.get_unchecked(rgb.r.min(2048) as usize); - *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = - *lut_table.get_unchecked(rgb.g.min(2048) as usize); - *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = - *lut_table.get_unchecked(rgb.b.min(2048) as usize); - if image_configuration.has_alpha() { - *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = - *src.get_unchecked(image_configuration.get_a_channel_offset()) as u8; - } + } + + for (dst, src) in dst + .chunks_exact_mut(channels) + .zip(transient_row.chunks_exact(channels)) + { + let r = src[image_configuration.get_r_channel_offset()]; + let g = src[image_configuration.get_g_channel_offset()]; + let b = src[image_configuration.get_b_channel_offset()]; + + let rgb = (Rgb::::new( + r.min(1f32).max(0f32), + g.min(1f32).max(0f32), + b.min(1f32).max(0f32), + ) * Rgb::::dup(2048f32)) + .round() + .cast::(); + + *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = + *lut_table.get_unchecked(rgb.r.min(2048) as usize); + *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = + *lut_table.get_unchecked(rgb.g.min(2048) as usize); + *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = + *lut_table.get_unchecked(rgb.b.min(2048) as usize); + if image_configuration.has_alpha() { + *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = + *src.get_unchecked(image_configuration.get_a_channel_offset()) as u8; } - }); - } + } + }); } /// This function converts *lαβ* with interleaved alpha channel to RGBA. This is much more effective than naive direct transformation diff --git a/src/planar_to_linear.rs b/src/planar_to_linear.rs index 3fd79ed..fc69342 100644 --- a/src/planar_to_linear.rs +++ b/src/planar_to_linear.rs @@ -49,24 +49,21 @@ fn channels_to_linear( .zip(src.chunks_exact(src_stride as usize)); } - dst_slice_safe_align - .par_chunks_exact_mut(dst_stride as usize) - .zip(src.par_chunks_exact(src_stride as usize)) - .for_each(|(dst, src)| unsafe { - let mut _cx = 0usize; + iter.for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; - let src_ptr = src.as_ptr(); - let dst_ptr = dst.as_mut_ptr() as *mut f32; + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; - for x in _cx..width as usize { - let px = x; - let dst = dst_ptr.add(px); - let src = src_ptr.add(px); - let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize); + for x in _cx..width as usize { + let px = x; + let dst = dst_ptr.add(px); + let src = src_ptr.add(px); + let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize); - dst.write_unaligned(transferred); - } - }); + dst.write_unaligned(transferred); + } + }); } /// This function converts Plane to Linear. This is much more effective than naive direct transformation diff --git a/src/sigmoidal_to_image.rs b/src/sigmoidal_to_image.rs index 7033feb..04891bd 100644 --- a/src/sigmoidal_to_image.rs +++ b/src/sigmoidal_to_image.rs @@ -17,7 +17,6 @@ use crate::{Rgb, Sigmoidal}; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; #[cfg(feature = "rayon")] use rayon::prelude::{ParallelSlice, ParallelSliceMut}; -#[cfg(feature = "rayon")] use std::slice; #[allow(clippy::type_complexity)]