Skip to content

Commit

Permalink
Refactor, Added SSE for RGBA -> XYZa
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 6, 2024
1 parent 74ef388 commit 02390c6
Show file tree
Hide file tree
Showing 23 changed files with 665 additions and 387 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
- run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu
- run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu i686-unknown-linux-gnu
- run: cargo build --target aarch64-unknown-linux-gnu
- run: cargo build --target x86_64-unknown-linux-gnu
- run: cargo build --target i686-unknown-linux-gnu
- name: Test release pipeline
run: cargo publish --dry-run
3 changes: 0 additions & 3 deletions src/avx2_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ use crate::image_to_xyz_lab::XyzTarget;
#[allow(unused_imports)]
use crate::neon_gamma_curves::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[allow(unused_imports)]
use crate::sse_gamma_curves::{sse_rec709_to_linear, sse_srgb_to_linear};
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::x86_64_simd_support::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
Expand Down
1 change: 0 additions & 1 deletion src/avx_math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ pub unsafe fn _mm256_prefer_fma_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
return _mm256_fmadd_ps(b, c, a);
}


#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
#[allow(dead_code)]
Expand Down
4 changes: 2 additions & 2 deletions src/concat_alpha.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::x86_64_simd_support::{avx2_deinterleave_rgb_ps, avx2_interleave_rgba_ps};
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::x86_64_simd_support::{sse_deinterleave_rgb_ps, sse_interleave_ps_rgba};
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
Expand All @@ -13,6 +11,8 @@ use std::arch::x86::*;
use std::arch::x86_64::*;
#[allow(unused_imports)]
use std::slice;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::*;

/// Adds alpha plane into an existing RGB/XYZ/LAB or other 3 plane image. Image will become RGBA, XYZa, LABa etc.
pub fn append_alpha(
Expand Down
13 changes: 8 additions & 5 deletions src/image_to_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::image::ImageConfiguration;
target_feature = "neon"
))]
use crate::neon_to_linear::neon_channels_to_linear;
#[cfg(target_arch = "x86_64")]
use crate::sse_to_linear::sse_channels_to_linear;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::*;
use crate::Rgb;

#[inline(always)]
Expand All @@ -35,18 +35,21 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(

let channels = image_configuration.get_channels_count();

#[cfg(target_arch = "x86_64")]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut _has_sse = false;

#[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}

for _ in 0..height as usize {
let mut cx = 0usize;

#[cfg(target_arch = "x86_64")]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
if _has_sse {
cx = sse_channels_to_linear::<CHANNELS_CONFIGURATION, USE_ALPHA>(
Expand Down
8 changes: 4 additions & 4 deletions src/image_to_linear_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ use crate::neon_to_linear::get_neon_linear_transfer;
))]
use crate::neon_to_linear_u8::neon_image_linear_to_u8;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_image_to_linear_u8::sse_image_to_linear_unsigned;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_to_linear::get_sse_linear_transfer;
use crate::sse::get_sse_linear_transfer;
use crate::Rgb;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_image_to_linear_unsigned::sse_channels_to_linear_u8;

#[inline]
fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down Expand Up @@ -60,7 +60,7 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
unsafe {
if _has_sse {
let transfer = get_sse_linear_transfer(transfer_function);
cx = sse_image_to_linear_unsigned::sse_channels_to_linear::<
cx = sse_channels_to_linear_u8::<
CHANNELS_CONFIGURATION,
USE_ALPHA,
>(
Expand Down
4 changes: 2 additions & 2 deletions src/image_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use crate::image_to_xyz_lab::XyzTarget::{LAB, LUV, XYZ};
target_feature = "neon"
))]
use crate::neon_to_xyz_lab::neon_channels_to_xyz_or_lab;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_to_xyz_lab::sse_channels_to_xyz_or_lab;
use crate::{Rgb, Xyz, SRGB_TO_XYZ_D65};
use std::slice;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_channels_to_xyz_or_lab;

pub(crate) enum XyzTarget {
LAB = 0,
Expand Down
54 changes: 35 additions & 19 deletions src/image_xyza_laba.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use crate::image::ImageConfiguration;
use crate::image_to_xyz_lab::XyzTarget;
use crate::image_to_xyz_lab::XyzTarget::{LAB, LUV, XYZ};
use crate::{Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65};
use std::slice;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
use crate::neon_to_xyza_laba::neon_channels_to_xyza_or_laba;
use crate::{Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65};
use std::slice;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_channels_to_xyza_laba;

#[inline(always)]
fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
Expand All @@ -29,6 +31,17 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
let mut src_offset = 0usize;
let mut dst_offset = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut _has_sse = false;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
#[cfg(target_feature = "sse4.1")]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}
}

const CHANNELS: usize = 4;

let channels = image_configuration.get_channels_count();
Expand All @@ -37,6 +50,22 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
#[allow(unused_mut)]
let mut cx = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
if _has_sse {
cx = sse_channels_to_xyza_laba::<CHANNELS_CONFIGURATION, TARGET>(
cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
&matrix,
transfer_function,
);
}
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
Expand Down Expand Up @@ -134,10 +163,7 @@ pub fn rgba_to_lab_with_alpha(
width: u32,
height: u32,
) {
channels_to_xyz_with_alpha::<
{ ImageConfiguration::Rgba as u8 },
{ LAB as u8 },
>(
channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { LAB as u8 }>(
src,
src_stride,
dst,
Expand Down Expand Up @@ -168,10 +194,7 @@ pub fn bgra_to_lab_with_alpha(
width: u32,
height: u32,
) {
channels_to_xyz_with_alpha::<
{ ImageConfiguration::Bgra as u8 },
{ LAB as u8 },
>(
channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { LAB as u8 }>(
src,
src_stride,
dst,
Expand All @@ -183,7 +206,6 @@ pub fn bgra_to_lab_with_alpha(
);
}


/// This function converts RGBA to CIE L*uv against D65 white point and preserving and normalizing alpha channels keeping it at last positions. This is much more effective than naive direct transformation
///
/// # Arguments
Expand All @@ -203,10 +225,7 @@ pub fn rgba_to_luv_with_alpha(
width: u32,
height: u32,
) {
channels_to_xyz_with_alpha::<
{ ImageConfiguration::Rgba as u8 },
{ LUV as u8 },
>(
channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { LUV as u8 }>(
src,
src_stride,
dst,
Expand Down Expand Up @@ -237,10 +256,7 @@ pub fn bgra_to_luv_with_alpha(
width: u32,
height: u32,
) {
channels_to_xyz_with_alpha::<
{ ImageConfiguration::Bgra as u8 },
{ LUV as u8 },
>(
channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { LUV as u8 }>(
src,
src_stride,
dst,
Expand Down
9 changes: 2 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,14 @@ mod rgba;
mod neon_math;
mod neon_gamma_curves;
mod xyz_transform;
mod image_to_xyz_lab;
mod image;
mod neon_to_xyz_lab;
mod xyz_lab_to_image;
mod sse_math;
mod sse_gamma_curves;
mod sse_to_xyz_lab;
mod x86_64_simd_support;
mod neon_to_linear;
mod sse_to_linear;
mod image_to_linear;
mod linear_to_image;
mod neon_linear_to_image;
mod sse_linear_to_image;
mod neon_xyz_lab_to_image;
mod concat_alpha;
mod avx_math;
Expand All @@ -36,8 +30,9 @@ mod xyza_laba_to_image;
mod neon_xyza_laba_to_image;
mod image_to_linear_u8;
mod neon_to_linear_u8;
mod sse_image_to_linear_u8;
mod linear_to_image_u8;
mod sse;
mod image_to_xyz_lab;

pub use gamma_curves::*;
pub use hsl::Hsl;
Expand Down
4 changes: 2 additions & 2 deletions src/linear_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use crate::image::ImageConfiguration;
target_feature = "neon"
))]
use crate::neon_linear_to_image::neon_linear_to_gamma;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_linear_to_image::sse_linear_to_gamma;
use crate::Rgb;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_linear_to_gamma;

#[inline(always)]
fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down
8 changes: 4 additions & 4 deletions src/linear_to_image_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ use crate::neon_linear_to_image::get_neon_gamma_transfer;
))]
use crate::neon_to_linear_u8::neon_image_linear_to_u8;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_image_to_linear_u8::sse_image_to_linear_unsigned;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_linear_to_image::get_sse_gamma_transfer;
use crate::sse::get_sse_gamma_transfer;
use crate::Rgb;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_image_to_linear_unsigned::sse_channels_to_linear_u8;

#[inline]
fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down Expand Up @@ -60,7 +60,7 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
unsafe {
if _has_sse {
let transfer = get_sse_gamma_transfer(transfer_function);
cx = sse_image_to_linear_unsigned::sse_channels_to_linear::<
cx = sse_channels_to_linear_u8::<
CHANNELS_CONFIGURATION,
USE_ALPHA,
>(
Expand Down
2 changes: 2 additions & 0 deletions src/rgb_expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::arch::aarch64::*;
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::*;

/// Expands RGB to RGBA.
pub fn rgb_to_rgba(
Expand Down
18 changes: 18 additions & 0 deletions src/sse/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
mod sse_image_to_linear_u8;
mod sse_linear_to_image;
mod sse_math;
mod sse_to_linear;
mod sse_to_xyz_lab;
mod sse_gamma_curves;
mod sse_support;
mod sse_from_xyz_lab;
mod sse_to_xyza_laba;

pub use sse_image_to_linear_u8::*;

Check failure on line 11 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_image_to_linear_u8::*`
pub use sse_linear_to_image::*;

Check failure on line 12 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_linear_to_image::*`
pub use sse_math::*;

Check failure on line 13 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_math::*`
pub use sse_to_xyz_lab::*;

Check failure on line 14 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_to_xyz_lab::*`
pub use sse_gamma_curves::*;

Check failure on line 15 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_gamma_curves::*`
pub use sse_support::*;

Check failure on line 16 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_support::*`
pub use sse_to_linear::*;

Check failure on line 17 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_to_linear::*`
pub use sse_to_xyza_laba::*;

Check failure on line 18 in src/sse/mod.rs

View workflow job for this annotation

GitHub Actions / Build

unused import: `sse_to_xyza_laba::*`
Empty file added src/sse/sse_from_xyz_lab.rs
Empty file.
12 changes: 11 additions & 1 deletion src/sse_gamma_curves.rs → src/sse/sse_gamma_curves.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[allow(unused_imports)]
use crate::gamma_curves::TransferFunction;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse_math::*;
use crate::sse::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[cfg(target_arch = "x86")]
Expand Down Expand Up @@ -86,3 +86,13 @@ pub unsafe fn sse_rec709_to_linear(linear: __m128) -> __m128 {
low = _mm_mul_ps(low, _mm_set1_ps(1f32 / 4.5f32));
return _mm_select_ps(mask, high, low);
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub unsafe fn get_sse_linear_transfer(
transfer_function: TransferFunction,
) -> unsafe fn(__m128) -> __m128 {
match transfer_function {
TransferFunction::Srgb => sse_srgb_to_linear,
TransferFunction::Rec709 => sse_rec709_to_linear,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,12 @@ pub mod sse_image_to_linear_unsigned {
use crate::image_to_xyz_lab::XyzTarget;
#[allow(unused_imports)]
use crate::neon_gamma_curves::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[allow(unused_imports)]
use crate::sse_gamma_curves::{sse_rec709_to_linear, sse_srgb_to_linear};
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::x86_64_simd_support::*;
use crate::sse::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
unsafe fn sse_triple_to_linear_u8(
r: __m128i,
Expand All @@ -41,9 +36,8 @@ pub mod sse_image_to_linear_unsigned {
)
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
pub(crate) unsafe fn sse_channels_to_linear<
pub(crate) unsafe fn sse_channels_to_linear_u8<
const CHANNELS_CONFIGURATION: u8,
const USE_ALPHA: bool,
>(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[allow(unused_imports)]
use crate::image::ImageConfiguration;
#[allow(unused_imports)]
use crate::sse_gamma_curves::*;
use crate::sse::*;
#[allow(unused_imports)]
use crate::x86_64_simd_support::*;
#[allow(unused_imports)]
Expand Down
File renamed without changes.
Loading

0 comments on commit 02390c6

Please sign in to comment.