Skip to content

Commit

Permalink
Fixing Oklab
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jul 20, 2024
1 parent c7e2bf9 commit f3ec079
Show file tree
Hide file tree
Showing 12 changed files with 297 additions and 90 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.4.15"
version = "0.4.16"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Allows conversion between
- [x] HSV
- [x] LAB
- [x] LUV
- [x] LCh
- [x] XYZ
- [x] Sigmoidal
- [x] Oklab
Expand Down
44 changes: 35 additions & 9 deletions src/neon/image_to_oklab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
use crate::image::ImageConfiguration;
use crate::neon::get_neon_linear_transfer;
use crate::neon::math::vcolorq_matrix_f32;
use crate::TransferFunction;
use crate::{TransferFunction, SRGB_TO_XYZ_D65};
use erydanos::vcbrtq_fast_f32;
use std::arch::aarch64::*;

macro_rules! triple_to_oklab {
($r: expr, $g: expr, $b: expr, $transfer: expr,
$x0: expr, $x1: expr, $x2: expr, $x3: expr, $x4: expr, $x5: expr, $x6: expr, $x7: expr, $x8: expr,
$c0:expr, $c1:expr, $c2: expr, $c3: expr, $c4:expr, $c5: expr, $c6:expr, $c7: expr, $c8: expr,
$m0: expr, $m1: expr, $m2: expr, $m3: expr, $m4: expr, $m5: expr, $m6: expr, $m7: expr, $m8: expr
) => {{
Expand All @@ -23,10 +24,13 @@ macro_rules! triple_to_oklab {
let dl_m = $transfer(g_f);
let dl_s = $transfer(b_f);

let (l_l, l_m, l_s) = vcolorq_matrix_f32(
dl_l, dl_m, dl_s, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8,
let (x, y, z) = vcolorq_matrix_f32(
dl_l, dl_m, dl_s, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8,
);

let (l_l, l_m, l_s) =
vcolorq_matrix_f32(x, y, z, $c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7, $c8);

let l_ = vcbrtq_fast_f32(l_l);
let m_ = vcbrtq_fast_f32(l_m);
let s_ = vcbrtq_fast_f32(l_s);
Expand Down Expand Up @@ -54,6 +58,19 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(

let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;

// Matrix To XYZ
let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(2)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(0)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(1)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(1).get_unchecked(2)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(0)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(1)),
vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(2)),
);

let (c0, c1, c2, c3, c4, c5, c6, c7, c8) = (
vdupq_n_f32(0.4122214708f32),
vdupq_n_f32(0.5363325363f32),
Expand Down Expand Up @@ -120,8 +137,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
let b_low_low = vmovl_u16(vget_low_u16(b_low));

let (x_low_low, y_low_low, z_low_low) = triple_to_oklab!(
r_low_low, g_low_low, b_low_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0, m1,
m2, m3, m4, m5, m6, m7, m8
r_low_low, g_low_low, b_low_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0, c1,
c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
);

let a_low = vmovl_u8(vget_low_u8(a_chan));
Expand All @@ -141,8 +158,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
let b_low_high = vmovl_high_u16(b_low);

let (x_low_high, y_low_high, z_low_high) = triple_to_oklab!(
r_low_high, g_low_high, b_low_high, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0,
m1, m2, m3, m4, m5, m6, m7, m8
r_low_high, g_low_high, b_low_high, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0,
c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
);

if image_configuration.has_alpha() {
Expand All @@ -163,8 +180,8 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
let b_high_low = vmovl_u16(vget_low_u16(b_high));

let (x_high_low, y_high_low, z_high_low) = triple_to_oklab!(
r_high_low, g_high_low, b_high_low, &transfer, c0, c1, c2, c3, c4, c5, c6, c7, c8, m0,
m1, m2, m3, m4, m5, m6, m7, m8
r_high_low, g_high_low, b_high_low, &transfer, x0, x1, x2, x3, x4, x5, x6, x7, x8, c0,
c1, c2, c3, c4, c5, c6, c7, c8, m0, m1, m2, m3, m4, m5, m6, m7, m8
);

let a_high = vmovl_high_u8(a_chan);
Expand All @@ -191,6 +208,15 @@ pub unsafe fn neon_image_to_oklab<const CHANNELS_CONFIGURATION: u8>(
g_high_high,
b_high_high,
&transfer,
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
c0,
c1,
c2,
Expand Down
34 changes: 3 additions & 31 deletions src/neon/linear_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,19 @@

use crate::image::ImageConfiguration;
use crate::neon::*;
use crate::TransferFunction;
use crate::{load_f32_and_deinterleave, TransferFunction};
use std::arch::aarch64::*;

#[inline(always)]
unsafe fn neon_gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
src: *const f32,
transfer_function: TransferFunction,
) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
let d_alpha = vdupq_n_f32(1f32);
let transfer = get_neon_gamma_transfer(transfer_function);
let v_scale_alpha = vdupq_n_f32(255f32);
let (mut r_f32, mut g_f32, mut b_f32, mut a_f32);
let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
match image_configuration {
ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
let rgba_pixels = vld4q_f32(src);
if image_configuration == ImageConfiguration::Rgba {
r_f32 = rgba_pixels.0;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.2;
} else {
r_f32 = rgba_pixels.2;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.0;
}
a_f32 = rgba_pixels.3;
}
ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
let rgb_pixels = vld3q_f32(src);
if image_configuration == ImageConfiguration::Rgb {
r_f32 = rgb_pixels.0;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.2;
} else {
r_f32 = rgb_pixels.2;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.0;
}
a_f32 = d_alpha;
}
}
let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) =
load_f32_and_deinterleave!(src, image_configuration);

r_f32 = transfer(r_f32);
g_f32 = transfer(g_f32);
Expand Down
1 change: 1 addition & 0 deletions src/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub mod linear_to_planar;
mod math;
mod oklab_to_image;
pub mod planar_to_linear;
mod routines;
mod sigmoidal;
mod to_linear;
mod to_linear_u8;
Expand Down
97 changes: 64 additions & 33 deletions src/neon/oklab_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use crate::image::ImageConfiguration;
use crate::neon::get_neon_gamma_transfer;
use crate::neon::math::vcolorq_matrix_f32;
use crate::TransferFunction;
use crate::{load_f32_and_deinterleave, TransferFunction, XYZ_TO_SRGB_D65};
use std::arch::aarch64::*;

#[inline(always)]
Expand All @@ -32,49 +32,31 @@ unsafe fn neon_oklab_gamma_vld<const CHANNELS_CONFIGURATION: u8>(
c6: float32x4_t,
c7: float32x4_t,
c8: float32x4_t,
x0: float32x4_t,
x1: float32x4_t,
x2: float32x4_t,
x3: float32x4_t,
x4: float32x4_t,
x5: float32x4_t,
x6: float32x4_t,
x7: float32x4_t,
x8: float32x4_t,
) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
let d_alpha = vdupq_n_f32(1f32);
let transfer = get_neon_gamma_transfer(transfer_function);
let v_scale_alpha = vdupq_n_f32(255f32);
let (mut r_f32, mut g_f32, mut b_f32, mut a_f32);
let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
match image_configuration {
ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
let rgba_pixels = vld4q_f32(src);
if image_configuration == ImageConfiguration::Rgba {
r_f32 = rgba_pixels.0;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.2;
} else {
r_f32 = rgba_pixels.2;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.0;
}
a_f32 = rgba_pixels.3;
}
ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
let rgb_pixels = vld3q_f32(src);
if image_configuration == ImageConfiguration::Rgb {
r_f32 = rgb_pixels.0;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.2;
} else {
r_f32 = rgb_pixels.2;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.0;
}
a_f32 = d_alpha;
}
}

let (mut r_f32, mut g_f32, mut b_f32, mut a_f32) =
load_f32_and_deinterleave!(src, image_configuration);
let (mut l_l, mut l_m, mut l_s) =
vcolorq_matrix_f32(r_f32, g_f32, b_f32, m0, m1, m2, m3, m4, m5, m6, m7, m8);

l_l = vmulq_f32(vmulq_f32(l_l, l_l), l_l);
l_m = vmulq_f32(vmulq_f32(l_m, l_m), l_m);
l_s = vmulq_f32(vmulq_f32(l_s, l_s), l_s);

let (r_l, g_l, b_l) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8);
let (x, y, z) = vcolorq_matrix_f32(l_l, l_m, l_s, c0, c1, c2, c3, c4, c5, c6, c7, c8);

let (r_l, g_l, b_l) = vcolorq_matrix_f32(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8);

r_f32 = transfer(r_l);
g_f32 = transfer(g_l);
Expand Down Expand Up @@ -107,6 +89,19 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
let channels = image_configuration.get_channels_count();
let mut cx = start_cx;

// Matrix from XYZ
let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(0)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(1)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(0).get_unchecked(2)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(0)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(1)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(1).get_unchecked(2)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(0)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(1)),
vdupq_n_f32(*XYZ_TO_SRGB_D65.get_unchecked(2).get_unchecked(2)),
);

let (m0, m1, m2, m3, m4, m5, m6, m7, m8) = (
vdupq_n_f32(1f32),
vdupq_n_f32(0.3963377774f32),
Expand Down Expand Up @@ -158,6 +153,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
c6,
c7,
c8,
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
);

let src_ptr_1 = offset_src_ptr.add(4 * channels);
Expand All @@ -183,6 +187,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
c6,
c7,
c8,
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
);

let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels);
Expand All @@ -208,6 +221,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
c6,
c7,
c8,
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
);

let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels);
Expand All @@ -233,6 +255,15 @@ pub unsafe fn neon_oklab_to_image<const CHANNELS_CONFIGURATION: u8>(
c6,
c7,
c8,
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
);

let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));
Expand Down
43 changes: 43 additions & 0 deletions src/neon/routines.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
* //
* // Use of this source code is governed by a BSD-style
* // license that can be found in the LICENSE file.
*/

#[macro_export]
macro_rules! load_f32_and_deinterleave {
($ptr: expr, $image_configuration: expr) => {{
let d_alpha = vdupq_n_f32(1f32);
let (r_f32, g_f32, b_f32, a_f32);
match $image_configuration {
ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
let rgba_pixels = vld4q_f32($ptr);
if $image_configuration == ImageConfiguration::Rgba {
r_f32 = rgba_pixels.0;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.2;
} else {
r_f32 = rgba_pixels.2;
g_f32 = rgba_pixels.1;
b_f32 = rgba_pixels.0;
}
a_f32 = rgba_pixels.3;
}
ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
let rgb_pixels = vld3q_f32($ptr);
if $image_configuration == ImageConfiguration::Rgb {
r_f32 = rgb_pixels.0;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.2;
} else {
r_f32 = rgb_pixels.2;
g_f32 = rgb_pixels.1;
b_f32 = rgb_pixels.0;
}
a_f32 = d_alpha;
}
}
(r_f32, g_f32, b_f32, a_f32)
}};
}
Loading

0 comments on commit f3ec079

Please sign in to comment.