Skip to content

Commit

Permalink
Added Jzazbz and Jzczhz, refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jul 22, 2024
1 parent 3d37974 commit 38b8048
Show file tree
Hide file tree
Showing 55 changed files with 963 additions and 1,628 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.5.0"
version = "0.5.1"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
22 changes: 14 additions & 8 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@ pub const fn shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
}

fn main() {
let r = 126;
let g = 126;
let b = 126;
let r = 0;
let g = 127;
let b = 255;
let rgb = Rgb::<u8>::new(r, g, b);
let jzazbz = Jzczhz::from_rgb(rgb, TransferFunction::Srgb);
println!("Jzczhz {:?}", jzazbz);
println!("Rgb {:?}", rgb);
let restored = jzazbz.to_rgb(TransferFunction::Srgb);
println!("Restored RGB {:?}", restored);
// let jzazbz = Jzazbz::from_rgb(rgb, TransferFunction::Srgb);
// println!("Jzczhz {:?}", jzazbz);
// println!("Rgb {:?}", rgb);
// let restored = jzazbz.to_rgb(TransferFunction::Srgb);
// println!("Restored RGB {:?}", restored);
println!(
"Restored RGB {:?}",
Jzazbz::new(0.1f32, 0.0, -0.2f32).to_rgb(TransferFunction::Srgb)
);

let img = ImageReader::open("./assets/beach_horizon.jpg")
.unwrap()
Expand Down Expand Up @@ -73,6 +77,7 @@ fn main() {
store_stride as u32,
width,
height,
200f32,
TransferFunction::Srgb,
);
let elapsed_time = start_time.elapsed();
Expand Down Expand Up @@ -108,6 +113,7 @@ fn main() {
src_stride,
width,
height,
200f32,
TransferFunction::Srgb,
);

Expand Down
18 changes: 9 additions & 9 deletions src/avx/to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ pub unsafe fn avx2_image_to_xyz_lab<

let transfer = get_avx2_linear_transfer(transfer_function);

let cq1 = _mm256_set1_ps(matrix[0][0]);
let cq2 = _mm256_set1_ps(matrix[0][1]);
let cq3 = _mm256_set1_ps(matrix[0][2]);
let cq4 = _mm256_set1_ps(matrix[1][0]);
let cq5 = _mm256_set1_ps(matrix[1][1]);
let cq6 = _mm256_set1_ps(matrix[1][2]);
let cq7 = _mm256_set1_ps(matrix[2][0]);
let cq8 = _mm256_set1_ps(matrix[2][1]);
let cq9 = _mm256_set1_ps(matrix[2][2]);
let cq1 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(0));
let cq2 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(1));
let cq3 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(2));
let cq4 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(0));
let cq5 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(1));
let cq6 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(2));
let cq7 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(0));
let cq8 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(1));
let cq9 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(2));

let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;

Expand Down
18 changes: 9 additions & 9 deletions src/avx/xyz_lab_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ pub unsafe fn avx_xyz_to_channels<

let mut cx = start_cx;

let c1 = _mm256_set1_ps(matrix[0][0]);
let c2 = _mm256_set1_ps(matrix[0][1]);
let c3 = _mm256_set1_ps(matrix[0][2]);
let c4 = _mm256_set1_ps(matrix[1][0]);
let c5 = _mm256_set1_ps(matrix[1][1]);
let c6 = _mm256_set1_ps(matrix[1][2]);
let c7 = _mm256_set1_ps(matrix[2][0]);
let c8 = _mm256_set1_ps(matrix[2][1]);
let c9 = _mm256_set1_ps(matrix[2][2]);
let c1 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(0));
let c2 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(1));
let c3 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(2));
let c4 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(0));
let c5 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(1));
let c6 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(2));
let c7 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(0));
let c8 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(1));
let c9 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(2));

const CHANNELS: usize = 3usize;

Expand Down
18 changes: 9 additions & 9 deletions src/avx/xyza_laba_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ pub unsafe fn avx_xyza_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET:

let mut cx = start_cx;

let c1 = _mm256_set1_ps(matrix[0][0]);
let c2 = _mm256_set1_ps(matrix[0][1]);
let c3 = _mm256_set1_ps(matrix[0][2]);
let c4 = _mm256_set1_ps(matrix[1][0]);
let c5 = _mm256_set1_ps(matrix[1][1]);
let c6 = _mm256_set1_ps(matrix[1][2]);
let c7 = _mm256_set1_ps(matrix[2][0]);
let c8 = _mm256_set1_ps(matrix[2][1]);
let c9 = _mm256_set1_ps(matrix[2][2]);
let c1 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(0));
let c2 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(1));
let c3 = _mm256_set1_ps(*matrix.get_unchecked(0).get_unchecked(2));
let c4 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(0));
let c5 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(1));
let c6 = _mm256_set1_ps(*matrix.get_unchecked(1).get_unchecked(2));
let c7 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(0));
let c8 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(1));
let c9 = _mm256_set1_ps(*matrix.get_unchecked(2).get_unchecked(2));

const CHANNELS: usize = 4usize;

Expand Down
46 changes: 16 additions & 30 deletions src/hsv_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,25 @@ fn hsv_u16_to_channels<
}
}

let mut _wide_row_handler: Option<
unsafe fn(usize, *const u16, usize, u32, *mut u8, usize, f32) -> usize,
> = None;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
let mut _has_sse = false;
if is_x86_feature_detected!("sse4.1") {
_wide_row_handler = Some(sse_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>);
}

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
{
_wide_row_handler =
Some(neon_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>);
}

let mut src_offset = 0usize;
Expand All @@ -65,43 +72,22 @@ fn hsv_u16_to_channels<
let scale = 1f32 / scale;

for _ in 0..height as usize {
#[allow(unused_mut)]
let mut _cx = 0usize;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
unsafe {
if _has_sse {
_cx = sse_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
if let Some(dispatcher) = _wide_row_handler {
unsafe {
_cx = dispatcher(
_cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
scale,
)
);
}
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
unsafe {
_cx = neon_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
_cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
scale,
)
}

let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const u16 };
let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };

Expand Down
49 changes: 18 additions & 31 deletions src/image_to_hsv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,26 @@ fn channels_to_hsv_u16<
}
}

let mut _wide_row_handler: Option<
unsafe fn(usize, *const u8, usize, u32, *mut u16, usize, f32) -> usize,
> = None;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
let mut _has_sse = false;
{
_wide_row_handler =
Some(neon_channels_to_hsv_u16::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>);
}

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
_wide_row_handler =
Some(sse_channels_to_hsv_u16::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>);
}

let mut src_offset = 0usize;
Expand All @@ -61,17 +69,12 @@ fn channels_to_hsv_u16<
let channels = image_configuration.get_channels_count();

for _ in 0..height as usize {
#[allow(unused_mut)]
let mut cx = 0usize;
let mut _cx = 0usize;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
unsafe {
if _has_sse {
cx = sse_channels_to_hsv_u16::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
cx,
if let Some(dispatcher) = _wide_row_handler {
unsafe {
_cx = dispatcher(
_cx,
src.as_ptr(),
src_offset,
width,
Expand All @@ -82,26 +85,10 @@ fn channels_to_hsv_u16<
}
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
unsafe {
cx = neon_channels_to_hsv_u16::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
scale,
)
}

let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut u16 };

for x in cx..width as usize {
for x in _cx..width as usize {
let px = x * channels;
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
Expand Down
Loading

0 comments on commit 38b8048

Please sign in to comment.