Skip to content

Commit

Permalink
Added AVX2, some rework and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed May 28, 2024
1 parent c80063d commit 3fd2220
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.2.1"
version = "0.2.2"
edition = "2021"
description = "Hig performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/image_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
#[cfg(feature = "avx2")]
#[cfg(target_feature = "avx2")]
if is_x86_feature_detected!("avx2") {
_has_avx2 = true;
}
Expand Down
6 changes: 3 additions & 3 deletions src/linear_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
let channels = image_configuration.get_channels_count();

#[cfg(target_arch = "x86_64")]
let mut has_sse = false;
let mut _has_sse = false;

#[cfg(target_arch = "x86_64")]
if is_x86_feature_detected!("sse4.1") {
has_sse = true;
_has_sse = true;
}

for _ in 0..height as usize {
let mut cx = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
if has_sse {
if _has_sse {
cx = sse_linear_to_gamma::<CHANNELS_CONFIGURATION, USE_ALPHA>(
cx,
src.as_ptr(),
Expand Down
15 changes: 8 additions & 7 deletions src/rgb_expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,19 @@ pub fn rgb_to_rgba(
let mut src_offset = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut use_sse = false;
let mut _use_sse = false;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut use_avx = false;
let mut _use_avx = false;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if is_x86_feature_detected!("sse4.1") {
use_sse = true;
_use_sse = true;
}
#[cfg(target_feature = "avx2")]
if is_x86_feature_detected!("avx2") {
use_avx = true;
_use_avx = true;
}
}

Expand All @@ -47,7 +48,7 @@ pub fn rgb_to_rgba(
unsafe {
let src_ptr = src.as_ptr().add(src_offset);
let dst_ptr = dst.as_mut_ptr().add(dst_offset);
if use_avx {
if _use_avx {
let v_alpha = _mm256_set1_epi8(default_alpha as i8);
while cx + 32 < width as usize {
let xyz_chan_ptr = src_ptr.add(cx * 3usize);
Expand All @@ -66,7 +67,7 @@ pub fn rgb_to_rgba(
cx += 32;
}
}
if use_sse {
if _use_sse {
let v_alpha = _mm_set1_epi8(default_alpha as i8);
while cx + 16 < width as usize {
let xyz_chan_ptr = src_ptr.add(cx * 3usize);
Expand Down Expand Up @@ -105,7 +106,7 @@ pub fn rgb_to_rgba(
dst[dst_offset + x * 4] = src[src_offset + x * 3];
dst[dst_offset + x * 4 + 1] = src[src_offset + x * 3 + 1];
dst[dst_offset + x * 4 + 2] = src[src_offset + x * 3 + 2];
dst[dst_offset + x * 4 + 3] = 255;
dst[dst_offset + x * 4 + 3] = default_alpha;
}

dst_offset += dst_stride as usize;
Expand Down
2 changes: 1 addition & 1 deletion src/sse_math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ pub(crate) unsafe fn _mm_neg_epi32(x: __m128i) -> __m128i {
#[inline(always)]
#[allow(dead_code)]
/// This is Cube Root using Pow functions,
/// it also precise however due to of inexact nature of power 1/3 result slightly differ
/// it is also precise however due to of inexact nature of power 1/3 result slightly differ
/// from real cbrt with about ULP 3-4, but this is almost 2 times faster than cbrt with real ULP 3.5
pub unsafe fn _mm_cbrt_ps(d: __m128) -> __m128 {
_mm_pow_n_ps(d, 1f32 / 3f32)
Expand Down

0 comments on commit 3fd2220

Please sign in to comment.