diff --git a/Cargo.lock b/Cargo.lock index 1f2fb71..56485d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,7 +163,7 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "colorutils-rs" -version = "0.5.2" +version = "0.5.3" dependencies = [ "erydanos", "half", diff --git a/Cargo.toml b/Cargo.toml index f06bbdc..f8f5f48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] } [package] name = "colorutils-rs" -version = "0.5.2" +version = "0.5.3" edition = "2021" description = "High performance utilities for color format handling and conversion." readme = "README.md" diff --git a/README.md b/README.md index 1b8e232..ad86368 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Allows conversion between - [x] XYZ - [x] Sigmoidal - [x] Oklab +- [x] Oklch - [x] Jzazbz - [x] Jzczhz diff --git a/src/app/src/main.rs b/src/app/src/main.rs index d7d0a92..46900dd 100644 --- a/src/app/src/main.rs +++ b/src/app/src/main.rs @@ -66,14 +66,13 @@ fn main() { lab_store.resize(width as usize * components * height as usize, 0f32); let src_stride = width * components as u32; let start_time = Instant::now(); - rgb_to_jzazbz( + rgb_to_oklch( src_bytes, src_stride, &mut lab_store, store_stride as u32, width, height, - 200f32, TransferFunction::Srgb, ); let elapsed_time = start_time.elapsed(); @@ -102,14 +101,13 @@ fn main() { // } let start_time = Instant::now(); - jzazbz_to_rgb( + oklch_to_rgb( &lab_store, store_stride as u32, &mut dst_slice, src_stride, width, height, - 200f32, TransferFunction::Srgb, ); diff --git a/src/image_to_oklab.rs b/src/image_to_oklab.rs index d96e424..3148671 100644 --- a/src/image_to_oklab.rs +++ b/src/image_to_oklab.rs @@ -10,6 +10,7 @@ use crate::image::ImageConfiguration; target_feature = "neon" ))] use crate::neon::neon_image_to_oklab; +use crate::oklch::Oklch; #[cfg(all( any(target_arch = "x86_64", target_arch = "x86"), target_feature = "sse4.1" @@ -17,8 +18,26 @@ use crate::neon::neon_image_to_oklab; use crate::sse::sse_image_to_oklab; use crate::{Oklab, Rgb, TransferFunction}; +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub(crate) enum OklabTarget { + OKLAB = 0, + OKLCH = 1, +} + +impl From for OklabTarget { + fn from(value: u8) -> Self { + match value { + 0 => OklabTarget::OKLAB, + 1 => OklabTarget::OKLCH, + _ => { + panic!("Not implemented") + } + } + } +} + #[inline(always)] -fn channels_to_oklab( +fn channels_to_oklab( src: &[u8], src_stride: u32, dst: &mut [f32], @@ -27,6 +46,7 @@ fn channels_to_oklab( height: u32, transfer_function: TransferFunction, ) { + let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); let channels = image_configuration.get_channels_count(); @@ -40,7 +60,7 @@ fn channels_to_oklab( target_feature = "neon" ))] { - _wide_row_handle = Some(neon_image_to_oklab::); + _wide_row_handle = Some(neon_image_to_oklab::); } #[cfg(all( @@ -48,7 +68,7 @@ fn channels_to_oklab( target_feature = "sse4.1" ))] if is_x86_feature_detected!("sse4.1") { - _wide_row_handle = Some(sse_image_to_oklab::); + _wide_row_handle = Some(sse_image_to_oklab::); } let mut src_offset = 0usize; @@ -92,14 +112,25 @@ fn channels_to_oklab( }; let rgb = Rgb::::new(r, g, b); - let oklab = Oklab::from_rgb(rgb, transfer_function); - let dst_store = unsafe { dst_ptr.add(px) }; - unsafe { - dst_store.write_unaligned(oklab.l); - dst_store.add(1).write_unaligned(oklab.a); - dst_store.add(2).write_unaligned(oklab.b); + match target { + OklabTarget::OKLAB => { + let oklab = Oklab::from_rgb(rgb, transfer_function); + unsafe { + dst_store.write_unaligned(oklab.l); + dst_store.add(1).write_unaligned(oklab.a); + dst_store.add(2).write_unaligned(oklab.b); + } + } + OklabTarget::OKLCH => { + let oklch = Oklch::from_rgb(rgb, transfer_function); + unsafe { + dst_store.write_unaligned(oklch.l); + dst_store.add(1).write_unaligned(oklch.c); + dst_store.add(2).write_unaligned(oklch.h); + } + } } if image_configuration.has_alpha() { @@ -138,7 +169,7 @@ pub fn rgb_to_oklab( height: u32, transfer_function: TransferFunction, ) { - channels_to_oklab::<{ ImageConfiguration::Rgb as u8 }>( + channels_to_oklab::<{ ImageConfiguration::Rgb as u8 }, { OklabTarget::OKLAB as u8 }>( src, src_stride, dst, @@ -168,7 +199,7 @@ pub fn rgba_to_oklab( height: u32, transfer_function: TransferFunction, ) { - channels_to_oklab::<{ ImageConfiguration::Rgba as u8 }>( + channels_to_oklab::<{ ImageConfiguration::Rgba as u8 }, { OklabTarget::OKLAB as u8 }>( src, src_stride, dst, @@ -198,7 +229,7 @@ pub fn bgra_to_oklab( height: u32, transfer_function: TransferFunction, ) { - channels_to_oklab::<{ ImageConfiguration::Bgra as u8 }>( + channels_to_oklab::<{ ImageConfiguration::Bgra as u8 }, { OklabTarget::OKLAB as u8 }>( src, src_stride, dst, @@ -228,7 +259,127 @@ pub fn bgr_to_oklab( height: u32, transfer_function: TransferFunction, ) { - channels_to_oklab::<{ ImageConfiguration::Bgr as u8 }>( + channels_to_oklab::<{ ImageConfiguration::Bgr as u8 }, { OklabTarget::OKLAB as u8 }>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts RGB to Oklch against D65 white point. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains RGB data +/// * `src_stride` - Bytes per row for src data. +/// * `width` - Image width +/// * `height` - Image height +/// * `dst` - A mutable slice to receive LCH(a) data +/// * `dst_stride` - Bytes per row for dst data +/// * `transfer_function` - transfer function to linear colorspace +pub fn rgb_to_oklch( + src: &[u8], + src_stride: u32, + dst: &mut [f32], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + channels_to_oklab::<{ ImageConfiguration::Rgb as u8 }, { OklabTarget::OKLCH as u8 }>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts RGBA to Oklch against D65 white point and preserving and normalizing alpha channels keeping it at last positions. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains RGBA data +/// * `src_stride` - Bytes per row for src data. +/// * `width` - Image width +/// * `height` - Image height +/// * `dst` - A mutable slice to receive LCH(a) data +/// * `dst_stride` - Bytes per row for dst data +/// * `transfer_function` - transfer function to linear colorspace +pub fn rgba_to_oklch( + src: &[u8], + src_stride: u32, + dst: &mut [f32], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + channels_to_oklab::<{ ImageConfiguration::Rgba as u8 }, { OklabTarget::OKLCH as u8 }>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts BGRA to Oklch against D65 white point and preserving and normalizing alpha channels keeping it at last positions. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains BGRA data +/// * `src_stride` - Bytes per row for src data. +/// * `width` - Image width +/// * `height` - Image height +/// * `dst` - A mutable slice to receive LCH(a) data +/// * `dst_stride` - Bytes per row for dst data +/// * `transfer_function` - transfer function to linear colorspace +pub fn bgra_to_oklch( + src: &[u8], + src_stride: u32, + dst: &mut [f32], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + channels_to_oklab::<{ ImageConfiguration::Bgra as u8 }, { OklabTarget::OKLCH as u8 }>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts BGR to Oklch against D65 white point. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains BGR data +/// * `src_stride` - Bytes per row for src data. +/// * `width` - Image width +/// * `height` - Image height +/// * `dst` - A mutable slice to receive LCH(a) data +/// * `dst_stride` - Bytes per row for dst data +/// * `transfer_function` - transfer function to linear colorspace +pub fn bgr_to_oklch( + src: &[u8], + src_stride: u32, + dst: &mut [f32], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + channels_to_oklab::<{ ImageConfiguration::Bgr as u8 }, { OklabTarget::OKLCH as u8 }>( src, src_stride, dst, diff --git a/src/lib.rs b/src/lib.rs index e743bd1..828bc40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,7 @@ mod luv; mod neon; mod oklab; mod oklab_to_image; +mod oklch; pub mod planar_to_linear; mod rgb; mod rgb_expand; @@ -134,9 +135,13 @@ pub use image_to_jzazbz::rgb_to_jzczhz; pub use image_to_jzazbz::rgba_to_jzazbz; pub use image_to_jzazbz::rgba_to_jzczhz; pub use image_to_oklab::bgr_to_oklab; +pub use image_to_oklab::bgr_to_oklch; pub use image_to_oklab::bgra_to_oklab; +pub use image_to_oklab::bgra_to_oklch; pub use image_to_oklab::rgb_to_oklab; +pub use image_to_oklab::rgb_to_oklch; pub use image_to_oklab::rgba_to_oklab; +pub use image_to_oklab::rgba_to_oklch; pub use image_to_sigmoidal::bgra_to_sigmoidal; pub use image_to_sigmoidal::rgb_to_sigmoidal; pub use image_to_sigmoidal::rgba_to_sigmoidal; @@ -155,6 +160,10 @@ pub use oklab_to_image::oklab_to_bgr; pub use oklab_to_image::oklab_to_bgra; pub use oklab_to_image::oklab_to_rgb; pub use oklab_to_image::oklab_to_rgba; +pub use oklab_to_image::oklch_to_bgr; +pub use oklab_to_image::oklch_to_bgra; +pub use oklab_to_image::oklch_to_rgb; +pub use oklab_to_image::oklch_to_rgba; pub use rgb_expand::*; pub use sigmoidal::Sigmoidal; pub use sigmoidal_to_image::sigmoidal_to_bgra; diff --git a/src/neon/image_to_oklab.rs b/src/neon/image_to_oklab.rs index 0af8199..02b0ffe 100644 --- a/src/neon/image_to_oklab.rs +++ b/src/neon/image_to_oklab.rs @@ -8,11 +8,12 @@ use crate::image::ImageConfiguration; use crate::neon::get_neon_linear_transfer; use crate::neon::math::vcolorq_matrix_f32; use crate::{load_u8_and_deinterleave, TransferFunction, SRGB_TO_XYZ_D65}; -use erydanos::vcbrtq_fast_f32; +use erydanos::{vatan2q_f32, vcbrtq_fast_f32, vhypotq_fast_f32}; use std::arch::aarch64::*; +use crate::image_to_oklab::OklabTarget; macro_rules! triple_to_oklab { - ($r: expr, $g: expr, $b: expr, $transfer: expr, + ($r: expr, $g: expr, $b: expr, $transfer: expr, $target: expr, $x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr, $c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr, $m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr @@ -35,13 +36,21 @@ macro_rules! triple_to_oklab { let m_ = vcbrtq_fast_f32(l_m); let s_ = vcbrtq_fast_f32(l_s); - let (l, m, s) = vcolorq_matrix_f32(l_, m_, s_, $m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, $m8); - (l, m, s) + let (l, mut a, mut b) = vcolorq_matrix_f32(l_, m_, s_, $m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, $m8); + + if $target == OklabTarget::OKLCH { + let c = vhypotq_fast_f32(a, b); + let h = vatan2q_f32(b, a); + a = c; + b = h; + } + + (l, a, b) }}; } #[inline(always)] -pub unsafe fn neon_image_to_oklab( +pub unsafe fn neon_image_to_oklab( start_cx: usize, src: *const u8, src_offset: usize, @@ -50,6 +59,7 @@ pub unsafe fn neon_image_to_oklab( dst_offset: usize, transfer_function: TransferFunction, ) -> usize { + let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); let channels = image_configuration.get_channels_count(); let mut cx = start_cx; @@ -109,7 +119,7 @@ pub unsafe fn neon_image_to_oklab( let b_low_low = vmovl_u16(vget_low_u16(b_low)); let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!( - r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, + r_low_low, g_low_low, b_low_low, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); @@ -130,7 +140,7 @@ pub unsafe fn neon_image_to_oklab( let b_low_high = vmovl_high_u16(b_low); let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!( - r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + r_low_high, g_low_high, b_low_high, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); @@ -152,7 +162,7 @@ pub unsafe fn neon_image_to_oklab( let b_high_low = vmovl_u16(vget_low_u16(b_high)); let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!( - r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, + r_high_low, g_high_low, b_high_low, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); @@ -180,6 +190,7 @@ pub unsafe fn neon_image_to_oklab( g_high_high, b_high_high, &transfer, + target, x0, x1, x2, diff --git a/src/neon/oklab_to_image.rs b/src/neon/oklab_to_image.rs index 14d0578..7935d6c 100644 --- a/src/neon/oklab_to_image.rs +++ b/src/neon/oklab_to_image.rs @@ -9,9 +9,11 @@ use crate::neon::get_neon_gamma_transfer; use crate::neon::math::vcolorq_matrix_f32; use crate::{load_f32_and_deinterleave, TransferFunction, XYZ_TO_SRGB_D65}; use std::arch::aarch64::*; +use erydanos::{vcosq_f32, vsinq_f32}; +use crate::image_to_oklab::OklabTarget; #[inline(always)] -unsafe fn neon_oklab_gamma_vld( +unsafe fn neon_oklab_gamma_vld( src: *const f32, transfer_function: TransferFunction, m0: float32x4_t, @@ -42,13 +44,21 @@ unsafe fn neon_oklab_gamma_vld( x7: float32x4_t, x8: float32x4_t, ) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) { + let target: OklabTarget = TARGET.into(); let transfer = get_neon_gamma_transfer(transfer_function); let v_scale_alpha = vdupq_n_f32(255f32); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) = - load_f32_and_deinterleave!(src, image_configuration); + let (l, mut a, mut b, mut a_f32) = load_f32_and_deinterleave!(src, image_configuration); + + if target == OklabTarget::OKLCH { + let a0 = vmulq_f32(a, vcosq_f32(b)); + let b0 = vmulq_f32(a, vsinq_f32(b)); + a = a0; + b = b0; + } + let (mut l_l, mut l_m, mut l_s) = - vcolorq_matrix_f32(r_f32, g_f32, b_f32, m0, m1, m2, m3, m4, m5, m6, m7, m8); + vcolorq_matrix_f32(l, a, b, m0, m1, m2, m3, m4, m5, m6, m7, m8); l_l = vmulq_f32(vmulq_f32(l_l, l_l), l_l); l_m = vmulq_f32(vmulq_f32(l_m, l_m), l_m); @@ -58,9 +68,9 @@ unsafe fn neon_oklab_gamma_vld( let (r_l, g_l, b_l) = vcolorq_matrix_f32(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8); - r_f32 = transfer(r_l); - g_f32 = transfer(g_l); - b_f32 = transfer(b_l); + let mut r_f32 = transfer(r_l); + let mut g_f32 = transfer(g_l); + let mut b_f32 = transfer(b_l); r_f32 = vmulq_f32(r_f32, v_scale_alpha); g_f32 = vmulq_f32(g_f32, v_scale_alpha); b_f32 = vmulq_f32(b_f32, v_scale_alpha); @@ -76,7 +86,7 @@ unsafe fn neon_oklab_gamma_vld( } #[inline(always)] -pub unsafe fn neon_oklab_to_image( +pub unsafe fn neon_oklab_to_image( start_cx: usize, src: *const f32, src_offset: u32, @@ -132,139 +142,143 @@ pub unsafe fn neon_oklab_to_image( let src_ptr_0 = offset_src_ptr; - let (r_row0_, g_row0_, b_row0_, a_row0_) = neon_oklab_gamma_vld::( - src_ptr_0, - transfer_function, - m0, - m1, - m2, - m3, - m4, - m5, - m6, - m7, - m8, - c0, - c1, - c2, - c3, - c4, - c5, - c6, - c7, - c8, - x0, - x1, - x2, - x3, - x4, - x5, - x6, - x7, - x8, - ); + let (r_row0_, g_row0_, b_row0_, a_row0_) = + neon_oklab_gamma_vld::( + src_ptr_0, + transfer_function, + m0, + m1, + m2, + m3, + m4, + m5, + m6, + m7, + m8, + c0, + c1, + c2, + c3, + c4, + c5, + c6, + c7, + c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + ); let src_ptr_1 = offset_src_ptr.add(4 * channels); - let (r_row1_, g_row1_, b_row1_, a_row1_) = neon_oklab_gamma_vld::( - src_ptr_1, - transfer_function, - m0, - m1, - m2, - m3, - m4, - m5, - m6, - m7, - m8, - c0, - c1, - c2, - c3, - c4, - c5, - c6, - c7, - c8, - x0, - x1, - x2, - x3, - x4, - x5, - x6, - x7, - x8, - ); + let (r_row1_, g_row1_, b_row1_, a_row1_) = + neon_oklab_gamma_vld::( + src_ptr_1, + transfer_function, + m0, + m1, + m2, + m3, + m4, + m5, + m6, + m7, + m8, + c0, + c1, + c2, + c3, + c4, + c5, + c6, + c7, + c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + ); let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels); - let (r_row2_, g_row2_, b_row2_, a_row2_) = neon_oklab_gamma_vld::( - src_ptr_2, - transfer_function, - m0, - m1, - m2, - m3, - m4, - m5, - m6, - m7, - m8, - c0, - c1, - c2, - c3, - c4, - c5, - c6, - c7, - c8, - x0, - x1, - x2, - x3, - x4, - x5, - x6, - x7, - x8, - ); + let (r_row2_, g_row2_, b_row2_, a_row2_) = + neon_oklab_gamma_vld::( + src_ptr_2, + transfer_function, + m0, + m1, + m2, + m3, + m4, + m5, + m6, + m7, + m8, + c0, + c1, + c2, + c3, + c4, + c5, + c6, + c7, + c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + ); let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels); - let (r_row3_, g_row3_, b_row3_, a_row3_) = neon_oklab_gamma_vld::( - src_ptr_3, - transfer_function, - m0, - m1, - m2, - m3, - m4, - m5, - m6, - m7, - m8, - c0, - c1, - c2, - c3, - c4, - c5, - c6, - c7, - c8, - x0, - x1, - x2, - x3, - x4, - x5, - x6, - x7, - x8, - ); + let (r_row3_, g_row3_, b_row3_, a_row3_) = + neon_oklab_gamma_vld::( + src_ptr_3, + transfer_function, + m0, + m1, + m2, + m3, + m4, + m5, + m6, + m7, + m8, + c0, + c1, + c2, + c3, + c4, + c5, + c6, + c7, + c8, + x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + ); let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_)); let g_row01 = vcombine_u16(vqmovn_u32(g_row0_), vqmovn_u32(g_row1_)); diff --git a/src/oklab_to_image.rs b/src/oklab_to_image.rs index 1deb26a..720a2cc 100644 --- a/src/oklab_to_image.rs +++ b/src/oklab_to_image.rs @@ -16,8 +16,10 @@ use crate::neon::neon_oklab_to_image; ))] use crate::sse::sse_oklab_to_image; use crate::{Oklab, TransferFunction}; +use crate::image_to_oklab::OklabTarget; +use crate::oklch::Oklch; -fn oklab_to_image( +fn oklab_to_image( src: &[f32], src_stride: u32, dst: &mut [u8], @@ -26,6 +28,7 @@ fn oklab_to_image( height: u32, transfer_function: TransferFunction, ) { + let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); let mut src_offset = 0usize; @@ -40,7 +43,7 @@ fn oklab_to_image( target_feature = "sse4.1" ))] if is_x86_feature_detected!("sse4.1") { - _wide_row_handle = Some(sse_oklab_to_image::); + _wide_row_handle = Some(sse_oklab_to_image::); } #[cfg(all( @@ -48,7 +51,7 @@ fn oklab_to_image( target_feature = "neon" ))] { - _wide_row_handle = Some(neon_oklab_to_image::); + _wide_row_handle = Some(neon_oklab_to_image::); } let channels = image_configuration.get_channels_count(); @@ -79,8 +82,16 @@ fn oklab_to_image( let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; let rgb; - let oklab = Oklab::new(l_x, l_y, l_z); - rgb = oklab.to_rgb(transfer_function); + match target { + OklabTarget::OKLAB => { + let oklab = Oklab::new(l_x, l_y, l_z); + rgb = oklab.to_rgb(transfer_function); + } + OklabTarget::OKLCH => { + let oklch = Oklch::new(l_x, l_y, l_z); + rgb = oklch.to_rgb(transfer_function); + } + } unsafe { let dst = dst_ptr.add(x * channels); @@ -123,7 +134,7 @@ pub fn oklab_to_rgba( height: u32, transfer_function: TransferFunction, ) { - oklab_to_image::<{ ImageConfiguration::Rgba as u8 }>( + oklab_to_image::<{ ImageConfiguration::Rgba as u8 }, {OklabTarget::OKLAB as u8}>( src, src_stride, dst, @@ -153,7 +164,7 @@ pub fn oklab_to_rgb( height: u32, transfer_function: TransferFunction, ) { - oklab_to_image::<{ ImageConfiguration::Rgb as u8 }>( + oklab_to_image::<{ ImageConfiguration::Rgb as u8 }, {OklabTarget::OKLAB as u8}>( src, src_stride, dst, @@ -183,7 +194,7 @@ pub fn oklab_to_bgr( height: u32, transfer_function: TransferFunction, ) { - oklab_to_image::<{ ImageConfiguration::Bgr as u8 }>( + oklab_to_image::<{ ImageConfiguration::Bgr as u8 }, {OklabTarget::OKLAB as u8}>( src, src_stride, dst, @@ -213,7 +224,127 @@ pub fn oklab_to_bgra( height: u32, transfer_function: TransferFunction, ) { - oklab_to_image::<{ ImageConfiguration::Bgra as u8 }>( + oklab_to_image::<{ ImageConfiguration::Bgra as u8 }, {OklabTarget::OKLAB as u8}>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts *Oklch* with interleaved alpha channel to RGBA. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains LCH data +/// * `src_stride` - Bytes per row for src data. +/// * `dst` - A mutable slice to receive RGBA data +/// * `dst_stride` - Bytes per row for dst data +/// * `width` - Image width +/// * `height` - Image height +/// * `transfer_function` - Transfer function from linear colorspace to gamma +pub fn oklch_to_rgba( + src: &[f32], + src_stride: u32, + dst: &mut [u8], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + oklab_to_image::<{ ImageConfiguration::Rgba as u8 }, {OklabTarget::OKLCH as u8}>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts *Oklch* to RGB. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains LCH data +/// * `src_stride` - Bytes per row for src data. +/// * `dst` - A mutable slice to receive RGB data +/// * `dst_stride` - Bytes per row for dst data +/// * `width` - Image width +/// * `height` - Image height +/// * `transfer_function` - Transfer function from linear colorspace to gamma +pub fn oklch_to_rgb( + src: &[f32], + src_stride: u32, + dst: &mut [u8], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + oklab_to_image::<{ ImageConfiguration::Rgb as u8 }, {OklabTarget::OKLCH as u8}>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts *Oklch* to BGR. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains LCH data +/// * `src_stride` - Bytes per row for src data. +/// * `dst` - A mutable slice to receive BGR data +/// * `dst_stride` - Bytes per row for dst data +/// * `width` - Image width +/// * `height` - Image height +/// * `transfer_function` - Transfer function from linear colorspace to gamma +pub fn oklch_to_bgr( + src: &[f32], + src_stride: u32, + dst: &mut [u8], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + oklab_to_image::<{ ImageConfiguration::Bgr as u8 }, {OklabTarget::OKLCH as u8}>( + src, + src_stride, + dst, + dst_stride, + width, + height, + transfer_function, + ); +} + +/// This function converts *Oklch* with interleaved alpha channel to BGRA. This is much more effective than naive direct transformation +/// +/// # Arguments +/// * `src` - A slice contains LCH data +/// * `src_stride` - Bytes per row for src data. +/// * `dst` - A mutable slice to receive BGRA data +/// * `dst_stride` - Bytes per row for dst data +/// * `width` - Image width +/// * `height` - Image height +/// * `transfer_function` - Transfer function from linear colorspace to gamma +pub fn oklch_to_bgra( + src: &[f32], + src_stride: u32, + dst: &mut [u8], + dst_stride: u32, + width: u32, + height: u32, + transfer_function: TransferFunction, +) { + oklab_to_image::<{ ImageConfiguration::Bgra as u8 }, {OklabTarget::OKLCH as u8}>( src, src_stride, dst, diff --git a/src/oklch.rs b/src/oklch.rs new file mode 100644 index 0000000..645c4dd --- /dev/null +++ b/src/oklch.rs @@ -0,0 +1,64 @@ +/* + * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. + * // + * // Use of this source code is governed by a BSD-style + * // license that can be found in the LICENSE file. + */ +use crate::{Oklab, Rgb, TransferFunction}; +use erydanos::{eatan2f, ehypotf, Cosine, Sine}; + +/// Represents *Oklch* colorspace +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub struct Oklch { + /// Lightness + pub l: f32, + /// Chroma + pub c: f32, + /// Hue + pub h: f32, +} + +impl Oklch { + /// Creates new instance + #[inline] + pub fn new(l: f32, c: f32, h: f32) -> Oklch { + Oklch { l, c, h } + } + + /// Converts *Rgb* into *Oklch* + /// + /// # Arguments + /// `transfer_function` - Transfer function into linear colorspace and its inverse + #[inline] + pub fn from_rgb(rgb: Rgb, transfer_function: TransferFunction) -> Oklch { + let oklab = rgb.to_oklab(transfer_function); + Oklch::from_oklab(oklab) + } + + /// Converts *Oklch* into *Rgb* + /// + /// # Arguments + /// `transfer_function` - Transfer function into linear colorspace and its inverse + #[inline] + pub fn to_rgb(&self, transfer_function: TransferFunction) -> Rgb { + let oklab = self.to_oklab(); + oklab.to_rgb(transfer_function) + } + + /// Converts *Oklab* to *Oklch* + #[inline] + pub fn from_oklab(oklab: Oklab) -> Oklch { + let chroma = ehypotf(oklab.b, oklab.a); + let hue = eatan2f(oklab.b, oklab.a); + Oklch::new(oklab.l, chroma, hue) + } + + /// Converts *Oklch* to *Oklab* + #[inline] + pub fn to_oklab(&self) -> Oklab { + let l = self.l; + let a = self.c * self.h.ecos(); + let b = self.c * self.h.esin(); + Oklab::new(l, a, b) + } +} diff --git a/src/rgb.rs b/src/rgb.rs index f1bcd2c..96679ca 100644 --- a/src/rgb.rs +++ b/src/rgb.rs @@ -10,6 +10,7 @@ use crate::lab::Lab; use crate::luv::Luv; use crate::{Hsl, Jzazbz, LCh, Oklab, Sigmoidal, TransferFunction, Xyz}; use erydanos::Euclidean3DDistance; +use crate::oklch::Oklch; #[derive(Debug, PartialOrd, PartialEq, Clone, Copy)] /// Represents any RGB values, Rgb, Rgb etc. @@ -107,6 +108,15 @@ impl Rgb { Oklab::from_rgb(*self, transfer_function) } + /// Converts rgb to *Oklch* + /// + /// # Arguments + /// `transfer_function` - Transfer function to convert into linear colorspace and backwards + #[inline] + pub fn to_oklch(&self, transfer_function: TransferFunction) -> Oklch { + Oklch::from_rgb(*self, transfer_function) + } + /// Converts rgb to S-shaped sigmoidized components #[inline] pub fn to_sigmoidal(&self) -> Sigmoidal { diff --git a/src/sse/image_to_oklab.rs b/src/sse/image_to_oklab.rs index 5427643..da85989 100644 --- a/src/sse/image_to_oklab.rs +++ b/src/sse/image_to_oklab.rs @@ -5,6 +5,7 @@ * // license that can be found in the LICENSE file. */ use crate::image::ImageConfiguration; +use crate::image_to_oklab::OklabTarget; use crate::sse::{ _mm_color_matrix_ps, get_sse_linear_transfer, sse_deinterleave_rgb, sse_deinterleave_rgba, sse_interleave_ps_rgb, sse_interleave_ps_rgba, @@ -13,14 +14,14 @@ use crate::{ load_u8_and_deinterleave, store_and_interleave_v3_f32, store_and_interleave_v4_f32, TransferFunction, SRGB_TO_XYZ_D65, }; -use erydanos::_mm_cbrt_fast_ps; +use erydanos::{_mm_atan2_ps, _mm_cbrt_fast_ps, _mm_hypot_fast_ps}; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; macro_rules! triple_to_oklab { - ($r: expr, $g: expr, $b: expr, $transfer: expr, + ($r: expr, $g: expr, $b: expr, $transfer: expr, $target: expr, $x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr, $c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr, $m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr @@ -44,14 +45,22 @@ macro_rules! triple_to_oklab { let m_ = _mm_cbrt_fast_ps(l_m); let s_ = _mm_cbrt_fast_ps(l_s); - let (l, m, s) = + let (l, mut a, mut b) = _mm_color_matrix_ps(l_, m_, s_, $m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, $m8); - (l, m, s) + + if $target == OklabTarget::OKLCH { + let c = _mm_hypot_fast_ps(a, b); + let h = _mm_atan2_ps(a, b); + a = c; + b = h; + } + + (l, a, b) }}; } #[inline(always)] -pub unsafe fn sse_image_to_oklab( +pub unsafe fn sse_image_to_oklab( start_cx: usize, src: *const u8, src_offset: usize, @@ -60,6 +69,7 @@ pub unsafe fn sse_image_to_oklab( dst_offset: usize, transfer_function: TransferFunction, ) -> usize { + let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); let channels = image_configuration.get_channels_count(); let mut cx = start_cx; @@ -119,8 +129,8 @@ pub unsafe fn sse_image_to_oklab( let b_low_low = _mm_cvtepu16_epi32(b_low); let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!( - r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1, - c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 + r_low_low, g_low_low, b_low_low, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, x8, + c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_low = _mm_cvtepu8_epi16(a_chan); @@ -141,8 +151,8 @@ pub unsafe fn sse_image_to_oklab( let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low)); let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!( - r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, - c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 + r_low_high, g_low_high, b_low_high, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, + x8, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); if image_configuration.has_alpha() { @@ -167,8 +177,8 @@ pub unsafe fn sse_image_to_oklab( let b_high_low = _mm_cvtepu16_epi32(b_high); let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!( - r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, - c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 + r_high_low, g_high_low, b_high_low, &transfer, target, x0, x1, x2, x3, x4, x5, x6, x7, + x8, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8 ); let a_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(a_chan)); @@ -191,6 +201,7 @@ pub unsafe fn sse_image_to_oklab( g_high_high, b_high_high, &transfer, + target, x0, x1, x2, diff --git a/src/sse/oklab_to_image.rs b/src/sse/oklab_to_image.rs index 080ec9e..080514d 100644 --- a/src/sse/oklab_to_image.rs +++ b/src/sse/oklab_to_image.rs @@ -5,6 +5,7 @@ * // license that can be found in the LICENSE file. */ use crate::image::ImageConfiguration; +use crate::image_to_oklab::OklabTarget; use crate::sse::{ _mm_color_matrix_ps, _mm_cube_ps, get_sse_gamma_transfer, sse_deinterleave_rgb_ps, sse_deinterleave_rgba_ps, sse_interleave_rgb, sse_interleave_rgba, @@ -13,6 +14,7 @@ use crate::{ load_f32_and_deinterleave, store_and_interleave_v3_u8, store_and_interleave_v4_u8, TransferFunction, XYZ_TO_SRGB_D65, }; +use erydanos::{_mm_cos_ps, _mm_sin_ps}; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] @@ -22,6 +24,7 @@ use std::arch::x86_64::*; unsafe fn sse_oklab_vld( src: *const f32, transfer_function: TransferFunction, + oklab_target: OklabTarget, m0: __m128, m1: __m128, m2: __m128, @@ -54,11 +57,17 @@ unsafe fn sse_oklab_vld( let v_scale_alpha = _mm_set1_ps(255f32); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) = - load_f32_and_deinterleave!(src, image_configuration); + let (l, mut a, mut b, mut a_f32) = load_f32_and_deinterleave!(src, image_configuration); + + if oklab_target == OklabTarget::OKLCH { + let a0 = _mm_mul_ps(a, _mm_cos_ps(b)); + let b0 = _mm_mul_ps(a, _mm_sin_ps(b)); + a = a0; + b = b0; + } let (mut l_l, mut l_m, mut l_s) = - _mm_color_matrix_ps(r_f32, g_f32, b_f32, m0, m1, m2, m3, m4, m5, m6, m7, m8); + _mm_color_matrix_ps(l, a, b, m0, m1, m2, m3, m4, m5, m6, m7, m8); l_l = _mm_cube_ps(l_l); l_m = _mm_cube_ps(l_m); @@ -68,9 +77,9 @@ unsafe fn sse_oklab_vld( let (r_l, g_l, b_l) = _mm_color_matrix_ps(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8); - r_f32 = transfer(r_l); - g_f32 = transfer(g_l); - b_f32 = transfer(b_l); + let mut r_f32 = transfer(r_l); + let mut g_f32 = transfer(g_l); + let mut b_f32 = transfer(b_l); r_f32 = _mm_mul_ps(r_f32, v_scale_alpha); g_f32 = _mm_mul_ps(g_f32, v_scale_alpha); @@ -98,7 +107,7 @@ unsafe fn sse_oklab_vld( } #[inline(always)] -pub unsafe fn sse_oklab_to_image( +pub unsafe fn sse_oklab_to_image( start_cx: usize, src: *const f32, src_offset: u32, @@ -107,6 +116,7 @@ pub unsafe fn sse_oklab_to_image( width: u32, transfer_function: TransferFunction, ) -> usize { + let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); let channels = image_configuration.get_channels_count(); let mut cx = start_cx; @@ -157,6 +167,7 @@ pub unsafe fn sse_oklab_to_image( let (r_row0_, g_row0_, b_row0_, a_row0_) = sse_oklab_vld::( src_ptr_0, transfer_function, + target, m0, m1, m2, @@ -191,6 +202,7 @@ pub unsafe fn sse_oklab_to_image( let (r_row1_, g_row1_, b_row1_, a_row1_) = sse_oklab_vld::( src_ptr_1, transfer_function, + target, m0, m1, m2, @@ -225,6 +237,7 @@ pub unsafe fn sse_oklab_to_image( let (r_row2_, g_row2_, b_row2_, a_row2_) = sse_oklab_vld::( src_ptr_2, transfer_function, + target, m0, m1, m2, @@ -259,6 +272,7 @@ pub unsafe fn sse_oklab_to_image( let (r_row3_, g_row3_, b_row3_, a_row3_) = sse_oklab_vld::( src_ptr_3, transfer_function, + target, m0, m1, m2,