From f3ec079473ba79a88df6b6635dd0f0df0885ca07 Mon Sep 17 00:00:00 2001 From: awxkee Date: Sat, 20 Jul 2024 21:25:05 +0100 Subject: [PATCH] Fixing Oklab --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 1 + src/neon/image_to_oklab.rs | 44 +++++++++++++---- src/neon/linear_to_image.rs | 34 ++----------- src/neon/mod.rs | 1 + src/neon/oklab_to_image.rs | 97 ++++++++++++++++++++++++------------- src/neon/routines.rs | 43 ++++++++++++++++ src/oklab.rs | 12 +++-- src/sse/image_to_oklab.rs | 44 +++++++++++++---- src/sse/oklab_to_image.rs | 64 +++++++++++++++++++++++- src/xyz.rs | 43 ++++++++++++++++ 12 files changed, 297 insertions(+), 90 deletions(-) create mode 100644 src/neon/routines.rs diff --git a/Cargo.lock b/Cargo.lock index 6c421d1..439a409 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,7 +163,7 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "colorutils-rs" -version = "0.4.15" +version = "0.4.16" dependencies = [ "erydanos", "half", diff --git a/Cargo.toml b/Cargo.toml index 3c6c442..5bafbd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] } [package] name = "colorutils-rs" -version = "0.4.15" +version = "0.4.16" edition = "2021" description = "High performance utilities for color format handling and conversion." readme = "README.md" diff --git a/README.md b/README.md index c6d649d..ff6ce74 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Allows conversion between - [x] HSV - [x] LAB - [x] LUV +- [x] LCh - [x] XYZ - [x] Sigmoidal - [x] Oklab diff --git a/src/neon/image_to_oklab.rs b/src/neon/image_to_oklab.rs index ada1f50..8d64aab 100644 --- a/src/neon/image_to_oklab.rs +++ b/src/neon/image_to_oklab.rs @@ -7,12 +7,13 @@ use crate::image::ImageConfiguration; use crate::neon::get_neon_linear_transfer; use crate::neon::math::vcolorq_matrix_f32; -use crate::TransferFunction; +use crate::{TransferFunction, SRGB_TO_XYZ_D65}; use erydanos::vcbrtq_fast_f32; use std::arch::aarch64::*; macro_rules! triple_to_oklab { ($r: expr, $g: expr, $b: expr, $transfer: expr, + $x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr, $c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr, $m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr ) => {{ @@ -23,10 +24,13 @@ macro_rules! triple_to_oklab { let dl_m = $transfer(g_f); let dl_s = $transfer(b_f); - let (l_l, l_m, l_s) = vcolorq_matrix_f32( - dl_l, dl_m, dl_s, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8, + let (x, y, z) = vcolorq_matrix_f32( + dl_l, dl_m, dl_s, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, ); + let (l_l, l_m, l_s) = + vcolorq_matrix_f32(x, y, z, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8); + let l_ = vcbrtq_fast_f32(l_l); let m_ = vcbrtq_fast_f32(l_m); let s_ = vcbrtq_fast_f32(l_s); @@ -54,6 +58,19 @@ pub unsafe fn neon_image_to_oklab( let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32; + // Matrix To XYZ + let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = ( + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(2)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(0)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(1)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(2)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(0)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(1)), + vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(2)), + ); + let (c0, c1, c2, c3, c4, c5, c6, c7, c8) = ( vdupq_n_f32(0.4122214708f32), vdupq_n_f32(0.5363325363f32), @@ -120,8 +137,8 @@ pub unsafe fn neon_image_to_oklab( let b_low_low = vmovl_u16(vget_low_u16(b_low)); let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!( - r_low_low, g_low_low, b_low_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, - m2, m3, m4, m5, m6, m7, m8 + r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, + c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_low = vmovl_u8(vget_low_u8(a_chan)); @@ -141,8 +158,8 @@ pub unsafe fn neon_image_to_oklab( let b_low_high = vmovl_high_u16(b_low); let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!( - r_low_high, g_low_high, b_low_high, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, - m1, m2, m3, m4, m5, m6, m7, m8 + r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); if image_configuration.has_alpha() { @@ -163,8 +180,8 @@ pub unsafe fn neon_image_to_oklab( let b_high_low = vmovl_u16(vget_low_u16(b_high)); let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!( - r_high_low, g_high_low, b_high_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, - m1, m2, m3, m4, m5, m6, m7, m8 + r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_high = vmovl_high_u8(a_chan); @@ -191,6 +208,15 @@ pub unsafe fn neon_image_to_oklab( g_high_high, b_high_high, &transfer, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, c0, c1, c2, diff --git a/src/neon/linear_to_image.rs b/src/neon/linear_to_image.rs index 0656e60..8931ae4 100644 --- a/src/neon/linear_to_image.rs +++ b/src/neon/linear_to_image.rs @@ -7,7 +7,7 @@ use crate::image::ImageConfiguration; use crate::neon::*; -use crate::TransferFunction; +use crate::{load_f32_and_deinterleave, TransferFunction}; use std::arch::aarch64::*; #[inline(always)] @@ -15,39 +15,11 @@ unsafe fn neon_gamma_vld (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) { - let d_alpha = vdupq_n_f32(1f32); let transfer = get_neon_gamma_transfer(transfer_function); let v_scale_alpha = vdupq_n_f32(255f32); - let (mut r_f32, mut g_f32, mut b_f32, mut a_f32); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - match image_configuration { - ImageConfiguration::Rgba | ImageConfiguration::Bgra => { - let rgba_pixels = vld4q_f32(src); - if image_configuration == ImageConfiguration::Rgba { - r_f32 = rgba_pixels.0; - g_f32 = rgba_pixels.1; - b_f32 = rgba_pixels.2; - } else { - r_f32 = rgba_pixels.2; - g_f32 = rgba_pixels.1; - b_f32 = rgba_pixels.0; - } - a_f32 = rgba_pixels.3; - } - ImageConfiguration::Bgr | ImageConfiguration::Rgb => { - let rgb_pixels = vld3q_f32(src); - if image_configuration == ImageConfiguration::Rgb { - r_f32 = rgb_pixels.0; - g_f32 = rgb_pixels.1; - b_f32 = rgb_pixels.2; - } else { - r_f32 = rgb_pixels.2; - g_f32 = rgb_pixels.1; - b_f32 = rgb_pixels.0; - } - a_f32 = d_alpha; - } - } + let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) = + load_f32_and_deinterleave!(src, image_configuration); r_f32 = transfer(r_f32); g_f32 = transfer(g_f32); diff --git a/src/neon/mod.rs b/src/neon/mod.rs index 1ef58d8..4e57171 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -17,6 +17,7 @@ pub mod linear_to_planar; mod math; mod oklab_to_image; pub mod planar_to_linear; +mod routines; mod sigmoidal; mod to_linear; mod to_linear_u8; diff --git a/src/neon/oklab_to_image.rs b/src/neon/oklab_to_image.rs index 7ef5cb6..14d0578 100644 --- a/src/neon/oklab_to_image.rs +++ b/src/neon/oklab_to_image.rs @@ -7,7 +7,7 @@ use crate::image::ImageConfiguration; use crate::neon::get_neon_gamma_transfer; use crate::neon::math::vcolorq_matrix_f32; -use crate::TransferFunction; +use crate::{load_f32_and_deinterleave, TransferFunction, XYZ_TO_SRGB_D65}; use std::arch::aarch64::*; #[inline(always)] @@ -32,41 +32,21 @@ unsafe fn neon_oklab_gamma_vld( c6: float32x4_t, c7: float32x4_t, c8: float32x4_t, + x0: float32x4_t, + x1: float32x4_t, + x2: float32x4_t, + x3: float32x4_t, + x4: float32x4_t, + x5: float32x4_t, + x6: float32x4_t, + x7: float32x4_t, + x8: float32x4_t, ) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) { - let d_alpha = vdupq_n_f32(1f32); let transfer = get_neon_gamma_transfer(transfer_function); let v_scale_alpha = vdupq_n_f32(255f32); - let (mut r_f32, mut g_f32, mut b_f32, mut a_f32); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - match image_configuration { - ImageConfiguration::Rgba | ImageConfiguration::Bgra => { - let rgba_pixels = vld4q_f32(src); - if image_configuration == ImageConfiguration::Rgba { - r_f32 = rgba_pixels.0; - g_f32 = rgba_pixels.1; - b_f32 = rgba_pixels.2; - } else { - r_f32 = rgba_pixels.2; - g_f32 = rgba_pixels.1; - b_f32 = rgba_pixels.0; - } - a_f32 = rgba_pixels.3; - } - ImageConfiguration::Bgr | ImageConfiguration::Rgb => { - let rgb_pixels = vld3q_f32(src); - if image_configuration == ImageConfiguration::Rgb { - r_f32 = rgb_pixels.0; - g_f32 = rgb_pixels.1; - b_f32 = rgb_pixels.2; - } else { - r_f32 = rgb_pixels.2; - g_f32 = rgb_pixels.1; - b_f32 = rgb_pixels.0; - } - a_f32 = d_alpha; - } - } - + let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) = + load_f32_and_deinterleave!(src, image_configuration); let (mut l_l, mut l_m, mut l_s) = vcolorq_matrix_f32(r_f32, g_f32, b_f32, m0, m1, m2, m3, m4, m5, m6, m7, m8); @@ -74,7 +54,9 @@ unsafe fn neon_oklab_gamma_vld( l_m = vmulq_f32(vmulq_f32(l_m, l_m), l_m); l_s = vmulq_f32(vmulq_f32(l_s, l_s), l_s); - let (r_l, g_l, b_l) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8); + let (x, y, z) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8); + + let (r_l, g_l, b_l) = vcolorq_matrix_f32(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8); r_f32 = transfer(r_l); g_f32 = transfer(g_l); @@ -107,6 +89,19 @@ pub unsafe fn neon_oklab_to_image( let channels = image_configuration.get_channels_count(); let mut cx = start_cx; + // Matrix from XYZ + let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = ( + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(0)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(1)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(2)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(0)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(1)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(2)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(0)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(1)), + vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(2)), + ); + let (m0, m1, m2, m3, m4, m5, m6, m7, m8) = ( vdupq_n_f32(1f32), vdupq_n_f32(0.3963377774f32), @@ -158,6 +153,15 @@ pub unsafe fn neon_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_1 = offset_src_ptr.add(4 * channels); @@ -183,6 +187,15 @@ pub unsafe fn neon_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels); @@ -208,6 +221,15 @@ pub unsafe fn neon_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels); @@ -233,6 +255,15 @@ pub unsafe fn neon_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_)); diff --git a/src/neon/routines.rs b/src/neon/routines.rs new file mode 100644 index 0000000..98e6be8 --- /dev/null +++ b/src/neon/routines.rs @@ -0,0 +1,43 @@ +/* + * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. + * // + * // Use of this source code is governed by a BSD-style + * // license that can be found in the LICENSE file. + */ + +#[macro_export] +macro_rules! load_f32_and_deinterleave { + ($ptr: expr, $image_configuration: expr) => {{ + let d_alpha = vdupq_n_f32(1f32); + let (r_f32, g_f32, b_f32, a_f32); + match $image_configuration { + ImageConfiguration::Rgba | ImageConfiguration::Bgra => { + let rgba_pixels = vld4q_f32($ptr); + if $image_configuration == ImageConfiguration::Rgba { + r_f32 = rgba_pixels.0; + g_f32 = rgba_pixels.1; + b_f32 = rgba_pixels.2; + } else { + r_f32 = rgba_pixels.2; + g_f32 = rgba_pixels.1; + b_f32 = rgba_pixels.0; + } + a_f32 = rgba_pixels.3; + } + ImageConfiguration::Bgr | ImageConfiguration::Rgb => { + let rgb_pixels = vld3q_f32($ptr); + if $image_configuration == ImageConfiguration::Rgb { + r_f32 = rgb_pixels.0; + g_f32 = rgb_pixels.1; + b_f32 = rgb_pixels.2; + } else { + r_f32 = rgb_pixels.2; + g_f32 = rgb_pixels.1; + b_f32 = rgb_pixels.0; + } + a_f32 = d_alpha; + } + } + (r_f32, g_f32, b_f32, a_f32) + }}; +} diff --git a/src/oklab.rs b/src/oklab.rs index 42abf01..65a9550 100644 --- a/src/oklab.rs +++ b/src/oklab.rs @@ -6,6 +6,7 @@ */ use crate::{ srgb_from_linear, srgb_to_linear, EuclideanDistance, Rgb, TaxicabDistance, TransferFunction, + Xyz, SRGB_TO_XYZ_D65, XYZ_TO_SRGB_D65, }; use erydanos::ehypot3f; @@ -77,9 +78,11 @@ impl Oklab { #[inline] fn linear_rgb_to_oklab(rgb: Rgb) -> Oklab { - let l = 0.4122214708f32 * rgb.r + 0.5363325363f32 * rgb.g + 0.0514459929f32 * rgb.b; - let m = 0.2119034982f32 * rgb.r + 0.6806995451f32 * rgb.g + 0.1073969566f32 * rgb.b; - let s = 0.0883024619f32 * rgb.r + 0.2817188376f32 * rgb.g + 0.6299787005f32 * rgb.b; + let xyz = Xyz::from_linear_rgb(&rgb, &SRGB_TO_XYZ_D65); + + let l = 0.4122214708f32 * xyz.x + 0.5363325363f32 * xyz.y + 0.0514459929f32 * xyz.z; + let m = 0.2119034982f32 * xyz.x + 0.6806995451f32 * xyz.y + 0.1073969566f32 * xyz.z; + let s = 0.0883024619f32 * xyz.x + 0.2817188376f32 * xyz.y + 0.6299787005f32 * xyz.z; let l_ = l.cbrt(); let m_ = m.cbrt(); @@ -103,11 +106,12 @@ impl Oklab { let m = m_ * m_ * m_; let s = s_ * s_ * s_; - return Rgb::::new( + let xyz = Xyz::new( 4.0767416621f32 * l - 3.3077115913f32 * m + 0.2309699292f32 * s, -1.2684380046f32 * l + 2.6097574011f32 * m - 0.3413193965f32 * s, -0.0041960863f32 * l - 0.7034186147f32 * m + 1.7076147010f32 * s, ); + xyz.to_linear_rgb(&XYZ_TO_SRGB_D65) } } diff --git a/src/sse/image_to_oklab.rs b/src/sse/image_to_oklab.rs index 177a7f8..5427643 100644 --- a/src/sse/image_to_oklab.rs +++ b/src/sse/image_to_oklab.rs @@ -11,7 +11,7 @@ use crate::sse::{ }; use crate::{ load_u8_and_deinterleave, store_and_interleave_v3_f32, store_and_interleave_v4_f32, - TransferFunction, + TransferFunction, SRGB_TO_XYZ_D65, }; use erydanos::_mm_cbrt_fast_ps; #[cfg(target_arch = "x86")] @@ -21,6 +21,7 @@ use std::arch::x86_64::*; macro_rules! triple_to_oklab { ($r: expr, $g: expr, $b: expr, $transfer: expr, + $x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr, $c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr, $m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr ) => {{ @@ -32,10 +33,13 @@ macro_rules! triple_to_oklab { let g_linear = $transfer(g_f); let b_linear = $transfer(b_f); - let (l_l, l_m, l_s) = _mm_color_matrix_ps( - r_linear, g_linear, b_linear, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8, + let (x, y, z) = _mm_color_matrix_ps( + r_linear, g_linear, b_linear, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, ); + let (l_l, l_m, l_s) = + _mm_color_matrix_ps(x, y, z, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8); + let l_ = _mm_cbrt_fast_ps(l_l); let m_ = _mm_cbrt_fast_ps(l_m); let s_ = _mm_cbrt_fast_ps(l_s); @@ -64,6 +68,19 @@ pub unsafe fn sse_image_to_oklab( let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32; + // Matrix To XYZ + let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = ( + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(2)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(0)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(1)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(2)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(0)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(1)), + _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(2)), + ); + let (c0, c1, c2, c3, c4, c5, c6, c7, c8) = ( _mm_set1_ps(0.4122214708f32), _mm_set1_ps(0.5363325363f32), @@ -102,8 +119,8 @@ pub unsafe fn sse_image_to_oklab( let b_low_low = _mm_cvtepu16_epi32(b_low); let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!( - r_low_low, g_low_low, b_low_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, - m2, m3, m4, m5, m6, m7, m8 + r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, + c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_low = _mm_cvtepu8_epi16(a_chan); @@ -124,8 +141,8 @@ pub unsafe fn sse_image_to_oklab( let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low)); let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!( - r_low_high, g_low_high, b_low_high, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, - m1, m2, m3, m4, m5, m6, m7, m8 + r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); if image_configuration.has_alpha() { @@ -150,8 +167,8 @@ pub unsafe fn sse_image_to_oklab( let b_high_low = _mm_cvtepu16_epi32(b_high); let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!( - r_high_low, g_high_low, b_high_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, - m1, m2, m3, m4, m5, m6, m7, m8 + r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(a_chan)); @@ -174,6 +191,15 @@ pub unsafe fn sse_image_to_oklab( g_high_high, b_high_high, &transfer, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, c0, c1, c2, diff --git a/src/sse/oklab_to_image.rs b/src/sse/oklab_to_image.rs index 55a41cb..080ec9e 100644 --- a/src/sse/oklab_to_image.rs +++ b/src/sse/oklab_to_image.rs @@ -11,7 +11,7 @@ use crate::sse::{ }; use crate::{ load_f32_and_deinterleave, store_and_interleave_v3_u8, store_and_interleave_v4_u8, - TransferFunction, + TransferFunction, XYZ_TO_SRGB_D65, }; #[cfg(target_arch = "x86")] use std::arch::x86::*; @@ -40,6 +40,15 @@ unsafe fn sse_oklab_vld( c6: __m128, c7: __m128, c8: __m128, + x0: __m128, + x1: __m128, + x2: __m128, + x3: __m128, + x4: __m128, + x5: __m128, + x6: __m128, + x7: __m128, + x8: __m128, ) -> (__m128i, __m128i, __m128i, __m128i) { let transfer = get_sse_gamma_transfer(transfer_function); let v_scale_alpha = _mm_set1_ps(255f32); @@ -55,7 +64,9 @@ unsafe fn sse_oklab_vld( l_m = _mm_cube_ps(l_m); l_s = _mm_cube_ps(l_s); - let (r_l, g_l, b_l) = _mm_color_matrix_ps(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8); + let (x, y, z) = _mm_color_matrix_ps(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8); + + let (r_l, g_l, b_l) = _mm_color_matrix_ps(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8); r_f32 = transfer(r_l); g_f32 = transfer(g_l); @@ -100,6 +111,19 @@ pub unsafe fn sse_oklab_to_image( let channels = image_configuration.get_channels_count(); let mut cx = start_cx; + // Matrix from XYZ + let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = ( + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(0)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(1)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(2)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(0)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(1)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(2)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(0)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(1)), + _mm_set1_ps(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(2)), + ); + let (m0, m1, m2, m3, m4, m5, m6, m7, m8) = ( _mm_set1_ps(1f32), _mm_set1_ps(0.3963377774f32), @@ -151,6 +175,15 @@ pub unsafe fn sse_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_1 = offset_src_ptr.add(4 * channels); @@ -176,6 +209,15 @@ pub unsafe fn sse_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels); @@ -201,6 +243,15 @@ pub unsafe fn sse_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels); @@ -226,6 +277,15 @@ pub unsafe fn sse_oklab_to_image( c6, c7, c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, ); let r_row01 = _mm_packus_epi32(r_row0_, r_row1_); diff --git a/src/xyz.rs b/src/xyz.rs index db68fa9..c86236d 100644 --- a/src/xyz.rs +++ b/src/xyz.rs @@ -80,6 +80,27 @@ impl Xyz { } } + /// This function converts from non-linear RGB components to XYZ + /// # Arguments + /// * `matrix` - Transformation matrix from RGB to XYZ, for example `SRGB_TO_XYZ_D65` + /// * `transfer_function` - Transfer functions for current colorspace + #[inline] + pub fn from_linear_rgb(rgb: &Rgb, matrix: &[[f32; 3]; 3]) -> Self { + unsafe { + Self::new( + (*(*matrix.get_unchecked(0)).get_unchecked(0)) * rgb.r + + (*(*matrix.get_unchecked(0)).get_unchecked(1)) * rgb.g + + (*(*matrix.get_unchecked(0)).get_unchecked(2)) * rgb.b, + (*(*matrix.get_unchecked(1)).get_unchecked(0)) * rgb.r + + (*(*matrix.get_unchecked(1)).get_unchecked(1)) * rgb.g + + (*(*matrix.get_unchecked(1)).get_unchecked(2)) * rgb.b, + (*(*matrix.get_unchecked(2)).get_unchecked(0)) * rgb.r + + (*(*matrix.get_unchecked(2)).get_unchecked(1)) * rgb.g + + (*(*matrix.get_unchecked(2)).get_unchecked(2)) * rgb.b, + ) + } + } + pub fn scaled(&self) -> (f32, f32, f32) { (self.x * 100f32, self.y * 100f32, self.z * 100f32) } @@ -117,6 +138,28 @@ impl Xyz { Rgb::new(r as u8, g as u8, b as u8) } } + + /// This function converts XYZ to linear RGB + /// # Arguments + /// * `matrix` - Transformation matrix from RGB to XYZ, for example `SRGB_TO_XYZ_D65` + #[inline] + pub fn to_linear_rgb(&self, matrix: &[[f32; 3]; 3]) -> Rgb { + let x = self.x; + let y = self.y; + let z = self.z; + unsafe { + let r = x * (*(*matrix.get_unchecked(0)).get_unchecked(0)) + + y * (*(*matrix.get_unchecked(0)).get_unchecked(1)) + + z * (*(*matrix.get_unchecked(0)).get_unchecked(2)); + let g = x * (*(*matrix.get_unchecked(1)).get_unchecked(0)) + + y * (*(*matrix.get_unchecked(1)).get_unchecked(1)) + + z * (*(*matrix.get_unchecked(1)).get_unchecked(2)); + let b = x * (*(*matrix.get_unchecked(2)).get_unchecked(0)) + + y * (*(*matrix.get_unchecked(2)).get_unchecked(1)) + + z * (*(*matrix.get_unchecked(2)).get_unchecked(2)); + Rgb::::new(r, g, b) + } + } } impl EuclideanDistance for Xyz {