Fixing Oklab

awxkee · Jul 20, 2024 · f3ec079 · f3ec079
1 parent c7e2bf9
commit f3ec079
Show file tree

Hide file tree

Showing 12 changed files with 297 additions and 90 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }
 
 [package]
 name = "colorutils-rs"
-version = "0.4.15"
+version = "0.4.16"
 edition = "2021"
 description = "High performance utilities for color format handling and conversion."
 readme = "README.md"

diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ Allows conversion between
 - [x] HSV
 - [x] LAB
 - [x] LUV
+- [x] LCh
 - [x] XYZ
 - [x] Sigmoidal
 - [x] Oklab

diff --git a/src/neon/image_to_oklab.rs b/src/neon/image_to_oklab.rs
@@ -7,12 +7,13 @@
 use crate::image::ImageConfiguration;
 use crate::neon::get_neon_linear_transfer;
 use crate::neon::math::vcolorq_matrix_f32;
-use crate::TransferFunction;
+use crate::{TransferFunction, SRGB_TO_XYZ_D65};
 use erydanos::vcbrtq_fast_f32;
 use std::arch::aarch64::*;
 
 macro_rules! triple_to_oklab {
     ($r: expr, $g: expr, $b: expr, $transfer: expr,
+    $x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr,
     $c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr,
         $m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr
     ) => {{
@@ -23,10 +24,13 @@ macro_rules! triple_to_oklab {
         let dl_m = $transfer(g_f);
         let dl_s = $transfer(b_f);
 
-        let (l_l, l_m, l_s) = vcolorq_matrix_f32(
-            dl_l, dl_m, dl_s, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8,
+        let (x, y, z) = vcolorq_matrix_f32(
+            dl_l, dl_m, dl_s, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8,
         );
 
+        let (l_l, l_m, l_s) =
+            vcolorq_matrix_f32(x, y, z, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8);
+
         let l_ = vcbrtq_fast_f32(l_l);
         let m_ = vcbrtq_fast_f32(l_m);
         let s_ = vcbrtq_fast_f32(l_s);
@@ -54,6 +58,19 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
 
     let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;
 
+    // Matrix To XYZ
+    let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(2)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(0)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(1)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(2)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(0)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(1)),
+        vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(2)),
+    );
+
     let (c0, c1, c2, c3, c4, c5, c6, c7, c8) = (
         vdupq_n_f32(0.4122214708f32),
         vdupq_n_f32(0.5363325363f32),
@@ -120,8 +137,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
         let b_low_low = vmovl_u16(vget_low_u16(b_low));
 
         let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!(
-            r_low_low, g_low_low, b_low_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1,
-            m2, m3, m4, m5, m6, m7, m8
+            r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1,
+            c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
         );
 
         let a_low = vmovl_u8(vget_low_u8(a_chan));
@@ -141,8 +158,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
         let b_low_high = vmovl_high_u16(b_low);
 
         let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!(
-            r_low_high, g_low_high, b_low_high, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0,
-            m1, m2, m3, m4, m5, m6, m7, m8
+            r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0,
+            c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
         );
 
         if image_configuration.has_alpha() {
@@ -163,8 +180,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
         let b_high_low = vmovl_u16(vget_low_u16(b_high));
 
         let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!(
-            r_high_low, g_high_low, b_high_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0,
-            m1, m2, m3, m4, m5, m6, m7, m8
+            r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0,
+            c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
         );
 
         let a_high = vmovl_high_u8(a_chan);
@@ -191,6 +208,15 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
             g_high_high,
             b_high_high,
             &transfer,
+            x0,
+            x1,
+            x2,
+            x3,
+            x4,
+            x5,
+            x6,
+            x7,
+            x8,
             c0,
             c1,
             c2,

diff --git a/src/neon/linear_to_image.rs b/src/neon/linear_to_image.rs
@@ -7,47 +7,19 @@
 
 use crate::image::ImageConfiguration;
 use crate::neon::*;
-use crate::TransferFunction;
+use crate::{load_f32_and_deinterleave, TransferFunction};
 use std::arch::aarch64::*;
 
 #[inline(always)]
 unsafe fn neon_gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
     src: *const f32,
     transfer_function: TransferFunction,
 ) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
-    let d_alpha = vdupq_n_f32(1f32);
     let transfer = get_neon_gamma_transfer(transfer_function);
     let v_scale_alpha = vdupq_n_f32(255f32);
-    let (mut r_f32, mut g_f32, mut b_f32, mut a_f32);
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
-    match image_configuration {
-        ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
-            let rgba_pixels = vld4q_f32(src);
-            if image_configuration == ImageConfiguration::Rgba {
-                r_f32 = rgba_pixels.0;
-                g_f32 = rgba_pixels.1;
-                b_f32 = rgba_pixels.2;
-            } else {
-                r_f32 = rgba_pixels.2;
-                g_f32 = rgba_pixels.1;
-                b_f32 = rgba_pixels.0;
-            }
-            a_f32 = rgba_pixels.3;
-        }
-        ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
-            let rgb_pixels = vld3q_f32(src);
-            if image_configuration == ImageConfiguration::Rgb {
-                r_f32 = rgb_pixels.0;
-                g_f32 = rgb_pixels.1;
-                b_f32 = rgb_pixels.2;
-            } else {
-                r_f32 = rgb_pixels.2;
-                g_f32 = rgb_pixels.1;
-                b_f32 = rgb_pixels.0;
-            }
-            a_f32 = d_alpha;
-        }
-    }
+    let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) =
+        load_f32_and_deinterleave!(src, image_configuration);
 
     r_f32 = transfer(r_f32);
     g_f32 = transfer(g_f32);

diff --git a/src/neon/mod.rs b/src/neon/mod.rs
@@ -17,6 +17,7 @@ pub mod linear_to_planar;
 mod math;
 mod oklab_to_image;
 pub mod planar_to_linear;
+mod routines;
 mod sigmoidal;
 mod to_linear;
 mod to_linear_u8;

diff --git a/src/neon/oklab_to_image.rs b/src/neon/oklab_to_image.rs
@@ -7,7 +7,7 @@
 use crate::image::ImageConfiguration;
 use crate::neon::get_neon_gamma_transfer;
 use crate::neon::math::vcolorq_matrix_f32;
-use crate::TransferFunction;
+use crate::{load_f32_and_deinterleave, TransferFunction, XYZ_TO_SRGB_D65};
 use std::arch::aarch64::*;
 
 #[inline(always)]
@@ -32,49 +32,31 @@ unsafe fn neon_oklab_gamma_vld<const CHANNELS_CONFIGURATION: u8>(
     c6: float32x4_t,
     c7: float32x4_t,
     c8: float32x4_t,
+    x0: float32x4_t,
+    x1: float32x4_t,
+    x2: float32x4_t,
+    x3: float32x4_t,
+    x4: float32x4_t,
+    x5: float32x4_t,
+    x6: float32x4_t,
+    x7: float32x4_t,
+    x8: float32x4_t,
 ) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
-    let d_alpha = vdupq_n_f32(1f32);
     let transfer = get_neon_gamma_transfer(transfer_function);
     let v_scale_alpha = vdupq_n_f32(255f32);
-    let (mut r_f32, mut g_f32, mut b_f32, mut a_f32);
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
-    match image_configuration {
-        ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
-            let rgba_pixels = vld4q_f32(src);
-            if image_configuration == ImageConfiguration::Rgba {
-                r_f32 = rgba_pixels.0;
-                g_f32 = rgba_pixels.1;
-                b_f32 = rgba_pixels.2;
-            } else {
-                r_f32 = rgba_pixels.2;
-                g_f32 = rgba_pixels.1;
-                b_f32 = rgba_pixels.0;
-            }
-            a_f32 = rgba_pixels.3;
-        }
-        ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
-            let rgb_pixels = vld3q_f32(src);
-            if image_configuration == ImageConfiguration::Rgb {
-                r_f32 = rgb_pixels.0;
-                g_f32 = rgb_pixels.1;
-                b_f32 = rgb_pixels.2;
-            } else {
-                r_f32 = rgb_pixels.2;
-                g_f32 = rgb_pixels.1;
-                b_f32 = rgb_pixels.0;
-            }
-            a_f32 = d_alpha;
-        }
-    }
-
+    let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) =
+        load_f32_and_deinterleave!(src, image_configuration);
     let (mut l_l, mut l_m, mut l_s) =
         vcolorq_matrix_f32(r_f32, g_f32, b_f32, m0, m1, m2, m3, m4, m5, m6, m7, m8);
 
     l_l = vmulq_f32(vmulq_f32(l_l, l_l), l_l);
     l_m = vmulq_f32(vmulq_f32(l_m, l_m), l_m);
     l_s = vmulq_f32(vmulq_f32(l_s, l_s), l_s);
 
-    let (r_l, g_l, b_l) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8);
+    let (x, y, z) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8);
+
+    let (r_l, g_l, b_l) = vcolorq_matrix_f32(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8);
 
     r_f32 = transfer(r_l);
     g_f32 = transfer(g_l);
@@ -107,6 +89,19 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
     let channels = image_configuration.get_channels_count();
     let mut cx = start_cx;
 
+    // Matrix from XYZ
+    let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(0)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(1)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(2)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(0)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(1)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(2)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(0)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(1)),
+        vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(2)),
+    );
+
     let (m0, m1, m2, m3, m4, m5, m6, m7, m8) = (
         vdupq_n_f32(1f32),
         vdupq_n_f32(0.3963377774f32),
@@ -158,6 +153,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
             c6,
             c7,
             c8,
+            x0,
+            x1,
+            x2,
+            x3,
+            x4,
+            x5,
+            x6,
+            x7,
+            x8,
         );
 
         let src_ptr_1 = offset_src_ptr.add(4 * channels);
@@ -183,6 +187,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
             c6,
             c7,
             c8,
+            x0,
+            x1,
+            x2,
+            x3,
+            x4,
+            x5,
+            x6,
+            x7,
+            x8,
         );
 
         let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels);
@@ -208,6 +221,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
             c6,
             c7,
             c8,
+            x0,
+            x1,
+            x2,
+            x3,
+            x4,
+            x5,
+            x6,
+            x7,
+            x8,
         );
 
         let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels);
@@ -233,6 +255,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
             c6,
             c7,
             c8,
+            x0,
+            x1,
+            x2,
+            x3,
+            x4,
+            x5,
+            x6,
+            x7,
+            x8,
         );
 
         let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));

diff --git a/src/neon/routines.rs b/src/neon/routines.rs
@@ -0,0 +1,43 @@
+/*
+ * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
+ * //
+ * // Use of this source code is governed by a BSD-style
+ * // license that can be found in the LICENSE file.
+ */
+
+#[macro_export]
+macro_rules! load_f32_and_deinterleave {
+    ($ptr: expr, $image_configuration: expr) => {{
+        let d_alpha = vdupq_n_f32(1f32);
+        let (r_f32, g_f32, b_f32, a_f32);
+        match $image_configuration {
+            ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
+                let rgba_pixels = vld4q_f32($ptr);
+                if $image_configuration == ImageConfiguration::Rgba {
+                    r_f32 = rgba_pixels.0;
+                    g_f32 = rgba_pixels.1;
+                    b_f32 = rgba_pixels.2;
+                } else {
+                    r_f32 = rgba_pixels.2;
+                    g_f32 = rgba_pixels.1;
+                    b_f32 = rgba_pixels.0;
+                }
+                a_f32 = rgba_pixels.3;
+            }
+            ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
+                let rgb_pixels = vld3q_f32($ptr);
+                if $image_configuration == ImageConfiguration::Rgb {
+                    r_f32 = rgb_pixels.0;
+                    g_f32 = rgb_pixels.1;
+                    b_f32 = rgb_pixels.2;
+                } else {
+                    r_f32 = rgb_pixels.2;
+                    g_f32 = rgb_pixels.1;
+                    b_f32 = rgb_pixels.0;
+                }
+                a_f32 = d_alpha;
+            }
+        }
+        (r_f32, g_f32, b_f32, a_f32)
+    }};
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ Allows conversion between @@
     - [x] HSV
     - [x] LAB
     - [x] LUV
+    - [x] LCh
     - [x] XYZ
     - [x] Sigmoidal
     - [x] Oklab
@@ Expand Down @@