Skip to content

Commit

Permalink
SSE planar linearization
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jul 11, 2024
1 parent 02464c3 commit d37a630
Show file tree
Hide file tree
Showing 15 changed files with 339 additions and 512 deletions.
82 changes: 41 additions & 41 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.4.12"
version = "0.4.13"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand All @@ -16,7 +16,7 @@ repository = "https://github.com/awxkee/colorutils-rs"
exclude = ["*.jpg"]

[dependencies]
erydanos = "0.1.0"
erydanos = "0.2.3"
half = "2.4.1"

[features]
Expand Down
4 changes: 2 additions & 2 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn main() {
lab_store.resize(width as usize * components * height as usize, 0f32);
let src_stride = width * components as u32;
let start_time = Instant::now();
rgb_to_lab(
rgb_to_lch(
src_bytes,
src_stride,
&mut lab_store,
Expand Down Expand Up @@ -92,7 +92,7 @@ fn main() {
// }

let start_time = Instant::now();
lab_to_srgb(
lch_to_rgb(
&lab_store,
store_stride as u32,
&mut dst_slice,
Expand Down
37 changes: 37 additions & 0 deletions src/linear_to_planar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
target_feature = "neon"
))]
use crate::neon::linear_to_planar::neon_linear_plane_to_gamma;
#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
use crate::sse::sse_linear_plane_to_gamma;
use crate::TransferFunction;

#[inline(always)]
Expand All @@ -20,6 +25,20 @@ fn linear_to_gamma_channels(

let transfer = transfer_function.get_gamma_function();

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
let mut _has_sse = false;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}

for _ in 0..height as usize {
let mut _cx = 0usize;

Expand All @@ -39,6 +58,24 @@ fn linear_to_gamma_channels(
);
}

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
unsafe {
if _has_sse {
_cx = sse_linear_plane_to_gamma(
_cx,
src.as_ptr(),
src_offset as u32,
dst.as_mut_ptr(),
dst_offset as u32,
width,
transfer_function,
);
}
}

let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 };
let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };

Expand Down
6 changes: 1 addition & 5 deletions src/neon/cie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@ use crate::luv::{
LUV_WHITE_V_PRIME,
};
use crate::neon::math::{prefer_vfmaq_f32, vcolorq_matrix_f32, vcubeq_f32};
use erydanos::neon::atan2f::vatan2q_f32;
use erydanos::neon::cbrtf::vcbrtq_f32;
use erydanos::neon::cosf::vcosq_f32;
use erydanos::neon::hypotf::vhypotq_fast_f32;
use erydanos::neon::sinf::vsinq_f32;
use std::arch::aarch64::*;
use erydanos::{vatan2q_f32, vcbrtq_f32, vcosq_f32, vhypotq_fast_f32, vsinq_f32};

#[inline(always)]
pub(crate) unsafe fn neon_triple_to_xyz(
Expand Down
3 changes: 1 addition & 2 deletions src/neon/math.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::arch::aarch64::*;

use erydanos::neon::powf::vpowq_fast_f32;
use erydanos::vpowq_fast_f32;

#[inline(always)]
#[allow(dead_code)]
Expand Down
Loading

0 comments on commit d37a630

Please sign in to comment.