Skip to content

Commit

Permalink
Added l-alpha-beta, avx rework, SSE improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Aug 1, 2024
1 parent df60780 commit 304088b
Show file tree
Hide file tree
Showing 49 changed files with 3,223 additions and 1,188 deletions.
5 changes: 4 additions & 1 deletion .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[target.x86_64-pc-windows-msvc]
rustflags = ["-Ctarget-cpu=native"]
rustflags = ["-Ctarget-cpu=native"]

[target.x86_64-apple-darwin]
rustflags = ["-Ctarget-feature=+sse4.1,+avx2"]
68 changes: 49 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.5.5"
version = "0.5.6"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Rust utilities for color handling and conversion.

## The goal is to provide support for common conversion and SIMD options for most common conversion path for high-performance

Available SIMD fast paths generally 5-10 times faster than naive implementations

Allows conversion between

- [x] Rgb/Rgba/Rgba1010102/Rgb565/RgbF16
Expand All @@ -14,6 +18,7 @@ Allows conversion between
- [x] Oklch
- [x] Jzazbz
- [x] Jzczhz
- [x] lαβ (l-alpha-beta)

### Performance

Expand Down
12 changes: 6 additions & 6 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ fn main() {
let g = 127;
let b = 255;
let rgb = Rgb::<u8>::new(r, g, b);
let lalphabeta = LAlphaBeta::from_rgb(rgb, TransferFunction::Srgb);
println!("LAlphaBeta {:?}", lalphabeta);
println!("Rgb {:?}", rgb);
let restored = lalphabeta.to_rgb(TransferFunction::Srgb);
println!("Restored RGB {:?}", restored);
let xyb = Oklab::from_rgb(rgb, TransferFunction::Srgb);
println!("XYB {:?}", xyb);
println!("Rgb {:?}", xyb.to_rgb(TransferFunction::Srgb));
// let restored = lalphabeta.to_rgb(TransferFunction::Srgb);
// println!("Restored RGB {:?}", restored);

let img = ImageReader::open("./assets/beach_horizon.jpg")
let img = ImageReader::open("./assets/asset.jpg")
.unwrap()
.decode()
.unwrap();
Expand Down
10 changes: 7 additions & 3 deletions src/avx/cie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
* // license that can be found in the LICENSE file.
*/

use crate::avx::_mm256_cube_ps;
use crate::avx::math::*;
use crate::avx::{_mm256_cube_ps, _mm256_prefer_fma_ps, _mm256_select_ps};
use crate::luv::{
LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y, LUV_MULTIPLIER_INVERSE_Y, LUV_WHITE_U_PRIME,
LUV_WHITE_V_PRIME,
};
use erydanos::{
_mm256_atan2_ps, _mm256_cbrt_ps, _mm256_cos_ps, _mm256_hypot_ps, _mm256_prefer_fma_ps,
_mm256_select_ps, _mm256_sin_ps,
};
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -147,8 +151,8 @@ pub(crate) unsafe fn avx2_triple_to_luv(
);
let u_prime = _mm256_div_ps(_mm256_mul_ps(x, _mm256_set1_ps(4f32)), den);
let v_prime = _mm256_div_ps(_mm256_mul_ps(y, _mm256_set1_ps(9f32)), den);
let sub_u_prime = _mm256_sub_ps(u_prime, _mm256_set1_ps(crate::luv::LUV_WHITE_U_PRIME));
let sub_v_prime = _mm256_sub_ps(v_prime, _mm256_set1_ps(crate::luv::LUV_WHITE_V_PRIME));
let sub_u_prime = _mm256_sub_ps(u_prime, _mm256_set1_ps(LUV_WHITE_U_PRIME));
let sub_v_prime = _mm256_sub_ps(v_prime, _mm256_set1_ps(LUV_WHITE_V_PRIME));
let l13 = _mm256_mul_ps(l, _mm256_set1_ps(13f32));
let u = _mm256_select_ps(nan_mask, zeros, _mm256_mul_ps(l13, sub_u_prime));
let v = _mm256_select_ps(nan_mask, zeros, _mm256_mul_ps(l13, sub_v_prime));
Expand Down
35 changes: 3 additions & 32 deletions src/avx/from_sigmoidal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ use crate::avx::{
avx2_deinterleave_rgb_ps, avx2_deinterleave_rgba_ps, avx2_interleave_rgb,
avx2_interleave_rgba_epi8, avx2_pack_s32, avx2_pack_u16,
};
use crate::avx_store_and_interleave_u8;
use crate::image::ImageConfiguration;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

use crate::image::ImageConfiguration;

#[inline(always)]
unsafe fn vld_sigmoidal<const CHANNELS_CONFIGURATION: u8>(
src: *const f32,
Expand Down Expand Up @@ -94,36 +94,7 @@ pub unsafe fn avx_from_sigmoidal_row<const CHANNELS_CONFIGURATION: u8>(

let dst_ptr = dst.add(cx * channels);

match image_configuration {
ImageConfiguration::Rgb => {
let (rgb0, rgb1, rgb2) = avx2_interleave_rgb(r_row, g_row, b_row);
_mm256_storeu_si256(dst_ptr as *mut __m256i, rgb0);
_mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, rgb1);
_mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, rgb2);
}
ImageConfiguration::Rgba => {
let (rgba0, rgba1, rgba2, rgba3) =
avx2_interleave_rgba_epi8(r_row, g_row, b_row, a_row);
_mm256_storeu_si256(dst_ptr as *mut __m256i, rgba0);
_mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, rgba1);
_mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, rgba2);
_mm256_storeu_si256(dst_ptr.add(96) as *mut __m256i, rgba3);
}
ImageConfiguration::Bgra => {
let (bgra0, bgra1, bgra2, bgra3) =
avx2_interleave_rgba_epi8(b_row, g_row, r_row, a_row);
_mm256_storeu_si256(dst_ptr as *mut __m256i, bgra0);
_mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, bgra1);
_mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, bgra2);
_mm256_storeu_si256(dst_ptr.add(96) as *mut __m256i, bgra3);
}
ImageConfiguration::Bgr => {
let (bgr0, bgr1, bgr2) = avx2_interleave_rgb(b_row, g_row, r_row);
_mm256_storeu_si256(dst_ptr as *mut __m256i, bgr0);
_mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, bgr1);
_mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, bgr2);
}
}
avx_store_and_interleave_u8!(dst_ptr, image_configuration, r_row, g_row, b_row, a_row);
cx += 32;
}

Expand Down
5 changes: 3 additions & 2 deletions src/avx/gamma_curves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use crate::avx::math::*;
#[allow(unused_imports)]
use crate::gamma_curves::TransferFunction;
use erydanos::_mm256_select_ps;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -100,8 +101,8 @@ pub unsafe fn avx2_pure_gamma(x: __m256, value: f32) -> __m256 {
let zero_mask = _mm256_cmp_ps::<_CMP_LE_OS>(x, zeros);
let ones_mask = _mm256_cmp_ps::<_CMP_GE_OS>(x, ones);
let mut rs = _mm256_pow_n_ps(x, value);
rs = crate::avx::math::_mm256_select_ps(zero_mask, zeros, rs);
crate::avx::math::_mm256_select_ps(ones_mask, ones, rs)
rs = _mm256_select_ps(zero_mask, zeros, rs);
_mm256_select_ps(ones_mask, ones, rs)
}

#[inline(always)]
Expand Down
Loading

0 comments on commit 304088b

Please sign in to comment.