Skip to content

Commit

Permalink
LUV bugfixes, improve LCh
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 5, 2024
1 parent 41d2886 commit 69af0da
Show file tree
Hide file tree
Showing 20 changed files with 714 additions and 111 deletions.
105 changes: 36 additions & 69 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ fn main() {
// _mm_storeu_ps(dst.as_mut_ptr() as *mut f32, ln);
// println!("{:?}", dst);
// }
// #[cfg(target_arch = "aarch64")]
// unsafe {
// let m = vdupq_n_f32(std::f32::consts::E);
// let cbrt = vlogq_f32_ulp35(m);
// let l = vgetq_lane_f32::<0>(cbrt);
// println!("Exp {}", l);
// }
#[cfg(target_arch = "aarch64")]
unsafe {
let m = vdupq_n_f32(27f32);
let cbrt = vcbrtq_f32_ulp2(m);
let l = vgetq_lane_f32::<0>(cbrt);
println!("Cbrt {}", l);
}

let img = ImageReader::open("./assets/asset_middle.jpg")
.unwrap()
Expand Down Expand Up @@ -68,9 +68,7 @@ fn main() {
let mut lab_store: Vec<f32> = vec![];
let store_stride = width as usize * 4usize * std::mem::size_of::<f32>();
lab_store.resize(width as usize * 4usize * height as usize, 0f32);
let mut alpha_store: Vec<f32> = vec![];
let alpha_stride = width as usize * std::mem::size_of::<f32>();
alpha_store.resize(width as usize * height as usize, 0f32);
let start_time = Instant::now();
rgba_to_lab_with_alpha(
src_bytes,
4u32 * width,
Expand Down Expand Up @@ -110,6 +108,10 @@ fn main() {
height,
);

let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("Fast image resize: {:.2?}", elapsed_time);

// laba_to_srgb(
// &lab_store,
// lab_stride as u32,
Expand All @@ -124,51 +126,16 @@ fn main() {
src_bytes = &dst_slice;
}

let mut xyz: Vec<f32> = vec![];
xyz.resize(4 * width as usize * height as usize, 0f32);

let mut a_plane: Vec<f32> = vec![];
a_plane.resize(width as usize * height as usize, 0f32);

for i in 0..1 {
let start_time = Instant::now();
// srgba_to_xyza(
// src_bytes,
// width * components,
// &mut xyz,
// width * 3 * std::mem::size_of::<f32>() as u32,
// &mut a_plane,
// width as u32 * std::mem::size_of::<f32>() as u32,
// width,
// height,
// );
// rgba_to_linear(
// src_bytes,
// width * components,
// &mut xyz,
// width * 3 * std::mem::size_of::<f32>() as u32,
// width,
// height,
// TransferFunction::Srgb,
// );
rgba_to_linear(
src_bytes,
width * components,
&mut xyz,
width * 4 * std::mem::size_of::<f32>() as u32,
width,
height,
TransferFunction::Srgb,
);
let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("sRGB to XYZ: {:.2?}", elapsed_time);
}

let mut dst_bytes: Vec<u8> = vec![];
dst_bytes.resize(width as usize * components as usize * height as usize, 0u8);

let start_time = Instant::now();
// let mut xyz: Vec<f32> = vec![];
// xyz.resize(4 * width as usize * height as usize, 0f32);
//
// let mut a_plane: Vec<f32> = vec![];
// a_plane.resize(width as usize * height as usize, 0f32);
//
// let mut dst_bytes: Vec<u8> = vec![];
// dst_bytes.resize(width as usize * components as usize * height as usize, 0u8);
//
// let start_time = Instant::now();
// xyz_to_srgb(
// &xyz,
// width * 3 * std::mem::size_of::<f32>() as u32,
Expand All @@ -177,16 +144,16 @@ fn main() {
// width,
// height,
// );

linear_to_rgba(
&xyz,
width * 4 * std::mem::size_of::<f32>() as u32,
&mut dst_bytes,
width * components,
width,
height,
TransferFunction::Srgb,
);
//
// linear_to_rgba(
// &xyz,
// width * 4 * std::mem::size_of::<f32>() as u32,
// &mut dst_bytes,
// width * components,
// width,
// height,
// TransferFunction::Srgb,
// );

// linear_to_rgb(
// &xyz,
Expand All @@ -198,16 +165,16 @@ fn main() {
// TransferFunction::Srgb,
// );

let elapsed_time = start_time.elapsed();
// let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("XYZ to sRGB: {:.2?}", elapsed_time);
// println!("XYZ to sRGB: {:.2?}", elapsed_time);

// let rgba = rgb_to_rgba(&dst_bytes, width, height);

if components == 4 {
image::save_buffer(
"converted.png",
dst_bytes.as_bytes(),
src_bytes.as_bytes(),
dimensions.0,
dimensions.1,
image::ExtendedColorType::Rgba8,
Expand All @@ -216,7 +183,7 @@ fn main() {
} else {
image::save_buffer(
"converted.jpg",
dst_bytes.as_bytes(),
src_bytes.as_bytes(),
dimensions.0,
dimensions.1,
image::ExtendedColorType::Rgb8,
Expand Down
56 changes: 56 additions & 0 deletions src/avx2_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::x86_64_simd_support::*;
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::luv::{LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y};

Check failure on line 24 in src/avx2_to_xyz_lab.rs

View workflow job for this annotation

GitHub Actions / Build

unused imports: `LUV_CUTOFF_FORWARD_Y`, `LUV_MULTIPLIER_FORWARD_Y`

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
Expand Down Expand Up @@ -66,6 +67,37 @@ unsafe fn avx2_triple_to_xyz(
(x, y, z)
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
pub(crate) unsafe fn avx2_triple_to_luv(
x: __m256,
y: __m256,
z: __m256,
) -> (__m256, __m256, __m256) {
let zeros = _mm256_setzero_ps();
let den = _mm256_prefer_fma_ps(
_mm256_prefer_fma_ps(x, z, _mm256_set1_ps(3f32)),
y,
_mm256_set1_ps(15f32),
);
let nan_mask = _mm256_cmp_ps::<_CMP_LT_OS>(den, _mm256_set1_ps(0f32));
let l_low_mask = _mm256_cmp_ps::<_CMP_LT_OS>(y, _mm256_set1_ps(LUV_CUTOFF_FORWARD_Y));
let y_cbrt = _mm256_cbrt_ps(y);
let l = _mm256_select_ps(
l_low_mask,
_mm256_mul_ps(y, _mm256_set1_ps(LUV_MULTIPLIER_FORWARD_Y)),
_mm256_prefer_fma_ps(_mm256_set1_ps(-16f32), y_cbrt, _mm256_set1_ps(116f32)),
);
let u_prime = _mm256_div_ps(_mm256_mul_ps(x, _mm256_set1_ps(4f32)), den);
let v_prime = _mm256_div_ps(_mm256_mul_ps(y, _mm256_set1_ps(9f32)), den);
let sub_u_prime = _mm256_sub_ps(u_prime, _mm256_set1_ps(crate::luv::LUV_WHITE_U_PRIME));
let sub_v_prime = _mm256_sub_ps(v_prime, _mm256_set1_ps(crate::luv::LUV_WHITE_V_PRIME));
let l13 = _mm256_mul_ps(l, _mm256_set1_ps(13f32));
let u = _mm256_select_ps(nan_mask, zeros, _mm256_mul_ps(l13, sub_u_prime));
let v = _mm256_select_ps(nan_mask, zeros, _mm256_mul_ps(l13, sub_v_prime));
(l, u, v)
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
#[allow(dead_code)]
Expand Down Expand Up @@ -191,6 +223,12 @@ pub(crate) unsafe fn avx2_channels_to_xyz_or_lab<
z_low_low = b;
}
XyzTarget::XYZ => {}
XyzTarget::LUV => {
let (l, u, v) = avx2_triple_to_luv(x_low_low, y_low_low, z_low_low);
x_low_low = l;
y_low_low = u;
z_low_low = v;
}
}

let write_dst_ptr = dst_ptr.add(cx * 3);
Expand Down Expand Up @@ -218,6 +256,12 @@ pub(crate) unsafe fn avx2_channels_to_xyz_or_lab<
z_low_high = b;
}
XyzTarget::XYZ => {}
XyzTarget::LUV => {
let (l, u, v) = avx2_triple_to_luv(x_low_high, y_low_high, z_low_high);
x_low_high = l;
y_low_high = u;
z_low_high = v;
}
}

let (v0, v1, v2) = avx2_interleave_rgb_ps(x_low_high, y_low_high, z_low_high);
Expand Down Expand Up @@ -246,6 +290,12 @@ pub(crate) unsafe fn avx2_channels_to_xyz_or_lab<
z_high_low = b;
}
XyzTarget::XYZ => {}
XyzTarget::LUV => {
let (l, u, v) = avx2_triple_to_luv(x_high_low, y_high_low, z_high_low);
x_high_low = l;
y_high_low = u;
z_high_low = v;
}
}

let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_low, y_high_low, z_high_low);
Expand Down Expand Up @@ -281,6 +331,12 @@ pub(crate) unsafe fn avx2_channels_to_xyz_or_lab<
z_high_high = b;
}
XyzTarget::XYZ => {}
XyzTarget::LUV => {
let (l, u, v) = avx2_triple_to_luv(x_high_high, y_high_high, z_high_high);
x_high_high = l;
y_high_high = u;
z_high_high = v;
}
}

let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_high, y_high_high, z_high_high);
Expand Down
1 change: 1 addition & 0 deletions src/concat_alpha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub fn append_alpha(

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
#[cfg(target_feature = "sse4.1")]
if is_x86_feature_detected!("sse4.1") {
_use_sse = true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/image_to_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
#[cfg(target_arch = "x86_64")]
let mut has_sse = false;

#[cfg(target_arch = "x86_64")]
#[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
if is_x86_feature_detected!("sse4.1") {
has_sse = true;
}
Expand Down
5 changes: 4 additions & 1 deletion src/image_to_linear_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut _has_sse = false;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}
Expand Down
Loading

0 comments on commit 69af0da

Please sign in to comment.