From c789fa46e68f69b7560f04873d76fce72edf33bc Mon Sep 17 00:00:00 2001
From: awxkee <radzivon.bartoshyk@proton.me>
Date: Tue, 15 Oct 2024 09:13:45 +0100
Subject: [PATCH] Lalpha beta bugfix, codegen

---
 src/app/Cargo.toml         |   2 +-
 src/image_to_lalphabeta.rs |  75 +++++++++++++-------------
 src/image_to_sigmoidal.rs  |   1 -
 src/lalphabeta_to_image.rs | 107 ++++++++++++++++++-------------------
 src/planar_to_linear.rs    |  27 +++++-----
 src/sigmoidal_to_image.rs  |   1 -
 6 files changed, 101 insertions(+), 112 deletions(-)
diff --git a/src/app/Cargo.toml b/src/app/Cargo.toml
index 3d3f908..1858ad5 100644
--- a/src/app/Cargo.toml
+++ b/src/app/Cargo.toml
@@ -4,5 +4,5 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-colorutils-rs = { path = "../../", features = ["rayon"] }
+colorutils-rs = { path = "../../", default-features = true }
 image = "0.25.1"
\ No newline at end of file
diff --git a/src/image_to_lalphabeta.rs b/src/image_to_lalphabeta.rs
index a0dcc3c..2036f21 100644
--- a/src/image_to_lalphabeta.rs
+++ b/src/image_to_lalphabeta.rs
@@ -53,52 +53,49 @@ fn channels_to_lalphabeta<const CHANNELS_CONFIGURATION: u8>(
             .zip(src.chunks_exact(src_stride as usize));
     }
 
-    #[cfg(feature = "rayon")]
-    {
-        iter.for_each(|(dst, src)| unsafe {
-            let mut _cx = 0usize;
-
-            let mut linearized_row = vec![0f32; width as usize * channels];
-            for (linear_chunk, src_chunk) in linearized_row
-                .chunks_exact_mut(channels)
-                .zip(src.chunks_exact(channels))
-            {
-                linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
-                    .get_unchecked(src_chunk[image_configuration.get_r_channel_offset()] as usize);
-                linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
-                    .get_unchecked(src_chunk[image_configuration.get_g_channel_offset()] as usize);
-                linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
-                    .get_unchecked(src_chunk[image_configuration.get_b_channel_offset()] as usize);
-                if image_configuration.has_alpha() {
-                    linear_chunk[image_configuration.get_a_channel_offset()] =
-                        src_chunk[image_configuration.get_a_channel_offset()] as f32 * (1. / 255.0);
-                }
+    iter.for_each(|(dst, src)| unsafe {
+        let mut _cx = 0usize;
+
+        let mut linearized_row = vec![0f32; width as usize * channels];
+        for (linear_chunk, src_chunk) in linearized_row
+            .chunks_exact_mut(channels)
+            .zip(src.chunks_exact(channels))
+        {
+            linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                .get_unchecked(src_chunk[image_configuration.get_r_channel_offset()] as usize);
+            linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                .get_unchecked(src_chunk[image_configuration.get_g_channel_offset()] as usize);
+            linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                .get_unchecked(src_chunk[image_configuration.get_b_channel_offset()] as usize);
+            if image_configuration.has_alpha() {
+                linear_chunk[image_configuration.get_a_channel_offset()] =
+                    src_chunk[image_configuration.get_a_channel_offset()] as f32 * (1. / 255.0);
             }
+        }
 
-            let dst_ptr = dst.as_mut_ptr() as *mut f32;
+        let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
-            for x in _cx..width as usize {
-                let px = x * channels;
+        for x in _cx..width as usize {
+            let px = x * channels;
 
-                let src = linearized_row.get_unchecked(px..);
-                let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
-                let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
-                let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
+            let src = linearized_row.get_unchecked(px..);
+            let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
+            let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
+            let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
 
-                let rgb = Rgb::<f32>::new(r, g, b);
-                let dst_store = dst_ptr.add(px);
-                let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65);
-                dst_store.write_unaligned(lalphabeta.l);
-                dst_store.add(1).write_unaligned(lalphabeta.alpha);
-                dst_store.add(2).write_unaligned(lalphabeta.beta);
+            let rgb = Rgb::<f32>::new(r, g, b);
+            let dst_store = dst_ptr.add(px);
+            let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65);
+            dst_store.write_unaligned(lalphabeta.l);
+            dst_store.add(1).write_unaligned(lalphabeta.alpha);
+            dst_store.add(2).write_unaligned(lalphabeta.beta);
 
-                if image_configuration.has_alpha() {
-                    let a = *src.get_unchecked(image_configuration.get_a_channel_offset());
-                    dst_store.add(3).write_unaligned(a);
-                }
+            if image_configuration.has_alpha() {
+                let a = *src.get_unchecked(image_configuration.get_a_channel_offset());
+                dst_store.add(3).write_unaligned(a);
             }
-        });
-    }
+        }
+    });
 }
 
 /// This function converts RGB to *lαβ* against D65 white point. This is much more effective than naive direct transformation
diff --git a/src/image_to_sigmoidal.rs b/src/image_to_sigmoidal.rs
index fc28c07..a0ce064 100644
--- a/src/image_to_sigmoidal.rs
+++ b/src/image_to_sigmoidal.rs
@@ -18,7 +18,6 @@ use crate::Rgb;
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[allow(clippy::type_complexity)]
diff --git a/src/lalphabeta_to_image.rs b/src/lalphabeta_to_image.rs
index b9dc538..85372d4 100644
--- a/src/lalphabeta_to_image.rs
+++ b/src/lalphabeta_to_image.rs
@@ -53,63 +53,60 @@ fn lalphabeta_to_image<const CHANNELS_CONFIGURATION: u8>(
             .zip(src_slice_safe_align.chunks_exact(src_stride as usize));
     }
 
-    #[cfg(feature = "rayon")]
-    {
-        iter.for_each(|(dst, src)| unsafe {
-            let mut _cx = 0usize;
-
-            let src_ptr = src.as_ptr() as *mut f32;
-
-            let mut transient_row = vec![0f32; width as usize * channels];
-
-            for x in _cx..width as usize {
-                let px = x * channels;
-                let l_x = src_ptr.add(px).read_unaligned();
-                let l_y = src_ptr.add(px + 1).read_unaligned();
-                let l_z = src_ptr.add(px + 2).read_unaligned();
-                let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z);
-                let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65);
-
-                let dst = transient_row.get_unchecked_mut((x * channels)..);
-                *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
-                *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
-                *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
-                if image_configuration.has_alpha() {
-                    let l_a = src_ptr.add(px + 3).read_unaligned();
-                    let a_value = (l_a * 255f32).max(0f32).round();
-                    *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = a_value;
-                }
+    iter.for_each(|(dst, src)| unsafe {
+        let mut _cx = 0usize;
+
+        let src_ptr = src.as_ptr() as *mut f32;
+
+        let mut transient_row = vec![0f32; width as usize * channels];
+
+        for x in _cx..width as usize {
+            let px = x * channels;
+            let l_x = src_ptr.add(px).read_unaligned();
+            let l_y = src_ptr.add(px + 1).read_unaligned();
+            let l_z = src_ptr.add(px + 2).read_unaligned();
+            let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z);
+            let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65);
+
+            let dst = transient_row.get_unchecked_mut((x * channels)..);
+            *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
+            *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
+            *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
+            if image_configuration.has_alpha() {
+                let l_a = src_ptr.add(px + 3).read_unaligned();
+                let a_value = (l_a * 255f32).max(0f32).round();
+                *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = a_value;
             }
-
-            for (dst, src) in dst
-                .chunks_exact_mut(channels)
-                .zip(transient_row.chunks_exact(channels))
-            {
-                let r = src[image_configuration.get_r_channel_offset()];
-                let g = src[image_configuration.get_g_channel_offset()];
-                let b = src[image_configuration.get_b_channel_offset()];
-
-                let rgb = (Rgb::<f32>::new(
-                    r.min(1f32).max(0f32),
-                    g.min(1f32).max(0f32),
-                    b.min(1f32).max(0f32),
-                ) * Rgb::<f32>::dup(2048f32))
-                .round()
-                .cast::<u16>();
-
-                *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) =
-                    *lut_table.get_unchecked(rgb.r.min(2048) as usize);
-                *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) =
-                    *lut_table.get_unchecked(rgb.g.min(2048) as usize);
-                *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) =
-                    *lut_table.get_unchecked(rgb.b.min(2048) as usize);
-                if image_configuration.has_alpha() {
-                    *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) =
-                        *src.get_unchecked(image_configuration.get_a_channel_offset()) as u8;
-                }
+        }
+
+        for (dst, src) in dst
+            .chunks_exact_mut(channels)
+            .zip(transient_row.chunks_exact(channels))
+        {
+            let r = src[image_configuration.get_r_channel_offset()];
+            let g = src[image_configuration.get_g_channel_offset()];
+            let b = src[image_configuration.get_b_channel_offset()];
+
+            let rgb = (Rgb::<f32>::new(
+                r.min(1f32).max(0f32),
+                g.min(1f32).max(0f32),
+                b.min(1f32).max(0f32),
+            ) * Rgb::<f32>::dup(2048f32))
+            .round()
+            .cast::<u16>();
+
+            *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) =
+                *lut_table.get_unchecked(rgb.r.min(2048) as usize);
+            *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) =
+                *lut_table.get_unchecked(rgb.g.min(2048) as usize);
+            *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) =
+                *lut_table.get_unchecked(rgb.b.min(2048) as usize);
+            if image_configuration.has_alpha() {
+                *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) =
+                    *src.get_unchecked(image_configuration.get_a_channel_offset()) as u8;
             }
-        });
-    }
+        }
+    });
 }
 
 /// This function converts *lαβ* with interleaved alpha channel to RGBA. This is much more effective than naive direct transformation
diff --git a/src/planar_to_linear.rs b/src/planar_to_linear.rs
index 3fd79ed..fc69342 100644
--- a/src/planar_to_linear.rs
+++ b/src/planar_to_linear.rs
@@ -49,24 +49,21 @@ fn channels_to_linear(
             .zip(src.chunks_exact(src_stride as usize));
     }
 
-    dst_slice_safe_align
-        .par_chunks_exact_mut(dst_stride as usize)
-        .zip(src.par_chunks_exact(src_stride as usize))
-        .for_each(|(dst, src)| unsafe {
-            let mut _cx = 0usize;
+    iter.for_each(|(dst, src)| unsafe {
+        let mut _cx = 0usize;
 
-            let src_ptr = src.as_ptr();
-            let dst_ptr = dst.as_mut_ptr() as *mut f32;
+        let src_ptr = src.as_ptr();
+        let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
-            for x in _cx..width as usize {
-                let px = x;
-                let dst = dst_ptr.add(px);
-                let src = src_ptr.add(px);
-                let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize);
+        for x in _cx..width as usize {
+            let px = x;
+            let dst = dst_ptr.add(px);
+            let src = src_ptr.add(px);
+            let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize);
 
-                dst.write_unaligned(transferred);
-            }
-        });
+            dst.write_unaligned(transferred);
+        }
+    });
 }
 
 /// This function converts Plane to Linear. This is much more effective than naive direct transformation
diff --git a/src/sigmoidal_to_image.rs b/src/sigmoidal_to_image.rs
index 7033feb..04891bd 100644
--- a/src/sigmoidal_to_image.rs
+++ b/src/sigmoidal_to_image.rs
@@ -17,7 +17,6 @@ use crate::{Rgb, Sigmoidal};
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[allow(clippy::type_complexity)]