Skip to content

Commit

Permalink
Finalize HSV conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 8, 2024
1 parent 226dc54 commit 4ceb35b
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 37 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.2.11"
version = "0.3.0"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
55 changes: 28 additions & 27 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::arch::aarch64::{vdupq_n_f32, vdupq_n_u32, vgetq_lane_f32, vgetq_lane_u32};
use colorutils_rs::*;
use image::io::Reader as ImageReader;
use image::{EncodableLayout, GenericImageView};
Expand Down Expand Up @@ -43,54 +44,54 @@ fn main() {
println!("Back RGB {:?}", hsl.to_rgb8());

// unsafe {
// let (h, s, l) = sse_rgb_to_hsl(
// _mm_set1_epi32(r as i32),
// _mm_set1_epi32(g as i32),
// _mm_set1_epi32(b as i32),
// _mm_set1_ps(1f32),
// let (h, s, l) = neon_rgb_to_hsl(
// vdupq_n_u32(r as u32),
// vdupq_n_u32(g as u32),
// vdupq_n_u32(b as u32),
// vdupq_n_f32(1f32),
// );
// println!(
// "NEON HSL {}, {}, {}",
// f32::from_bits(_mm_extract_ps::<0>(h) as u32),
// f32::from_bits(_mm_extract_ps::<0>(s) as u32),
// f32::from_bits(_mm_extract_ps::<0>(l) as u32)
// vgetq_lane_f32::<0>(h),
// vgetq_lane_f32::<0>(s),
// vgetq_lane_f32::<0>(l)
// );
// let (r1, g1, b1) = sse_hsl_to_rgb(h, s, l, _mm_set1_ps(1f32));
// let (r1, g1, b1) = neon_hsl_to_rgb(h, s, l, vdupq_n_f32(1f32));
//
// println!(
// "NEON HSL -> RGB {}, {}, {}",
// _mm_extract_epi32::<0>(r1),
// _mm_extract_epi32::<0>(g1),
// _mm_extract_epi32::<0>(b1)
// vgetq_lane_u32::<0>(r1),
// vgetq_lane_u32::<0>(g1),
// vgetq_lane_u32::<0>(b1)
// );
// }
//
// unsafe {
// let (h, s, v) = sse_rgb_to_hsv(
// _mm_set1_epi32(r as i32),
// _mm_set1_epi32(g as i32),
// _mm_set1_epi32(b as i32),
// _mm_set1_ps(1f32),
// let (h, s, v) = neon_rgb_to_hsv(
// vdupq_n_u32(r as u32),
// vdupq_n_u32(g as u32),
// vdupq_n_u32(b as u32),
// vdupq_n_f32(1f32),
// );
// let hsv = rgb.to_hsv();
// println!("HSV {:?}", hsv);
// println!("HSV->RBB {:?}", hsv.to_rgb8());
// println!(
// "NEON HSV {}, {}, {}",
// f32::from_bits(_mm_extract_ps::<0>(h) as u32),
// f32::from_bits(_mm_extract_ps::<0>(s) as u32),
// f32::from_bits(_mm_extract_ps::<0>(v) as u32)
// vgetq_lane_f32::<0>(h),
// vgetq_lane_f32::<0>(s),
// vgetq_lane_f32::<0>(v)
// );
// let (r1, g1, b1) = sse_hsv_to_rgb(h, s, v, _mm_set1_ps(1f32));
// let (r1, g1, b1) = neon_hsv_to_rgb(h, s, v, vdupq_n_f32(1f32));
// println!(
// "NEON RGB {}, {}, {}",
// _mm_extract_epi32::<0>(r1),
// _mm_extract_epi32::<0>(g1),
// _mm_extract_epi32::<0>(b1)
// vgetq_lane_u32::<0>(r1),
// vgetq_lane_u32::<0>(g1),
// vgetq_lane_u32::<0>(b1)
// );
// }

let img = ImageReader::open("./assets/asset.jpg")
let img = ImageReader::open("./assets/asset_middle.jpg")
.unwrap()
.decode()
.unwrap();
Expand Down Expand Up @@ -125,7 +126,7 @@ fn main() {
lab_store.resize(width as usize * components * height as usize, 0u16);
let src_stride = width * components as u32;
let start_time = Instant::now();
rgb_to_hsl(
rgb_to_hsv(
src_bytes,
src_stride,
&mut lab_store,
Expand Down Expand Up @@ -160,7 +161,7 @@ fn main() {
// }

let start_time = Instant::now();
hsl_to_rgb(
hsv_to_rgb(
&lab_store,
store_stride as u32,
&mut dst_slice,
Expand Down
4 changes: 2 additions & 2 deletions src/hsv_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ fn hsv_u16_to_channels<
target_feature = "neon"
))]
unsafe {
cx = neon_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
cx,
_cx = neon_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
_cx,
src.as_ptr(),
src_offset,
width,
Expand Down
104 changes: 101 additions & 3 deletions src/neon/neon_hsv_to_image.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::arch::aarch64::*;

use crate::{neon_hsl_to_rgb, neon_hsv_to_rgb};
use crate::image::ImageConfiguration;
use crate::image_to_hsv_support::HsvTarget;
use crate::neon::{neon_hsl_to_rgb, neon_hsv_to_rgb};

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
Expand Down Expand Up @@ -32,11 +32,109 @@ pub unsafe fn neon_hsv_u16_to_image<
}

let channels = image_configuration.get_channels_count();

let v_scale = vdupq_n_f32(scale);

let dst_ptr = dst.add(dst_offset);

while cx + 16 < width as usize {
let (h_chan, s_chan, v_chan, a_chan_lo);
let src_ptr = ((src as *const u8).add(src_offset) as *const u16).add(cx * channels);

match image_configuration {
ImageConfiguration::Rgb | ImageConfiguration::Bgr => {
let hsv_pixel = vld3q_u16(src_ptr);
h_chan = hsv_pixel.0;
s_chan = hsv_pixel.1;
v_chan = hsv_pixel.2;
a_chan_lo = vdupq_n_u16(255);
}
ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
let hsv_pixel = vld4q_u16(src_ptr);
h_chan = hsv_pixel.0;
s_chan = hsv_pixel.1;
v_chan = hsv_pixel.2;
a_chan_lo = hsv_pixel.3;
}
}

let h_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(h_chan)));
let s_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(s_chan)));
let v_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_chan)));

let (r_low, g_low, b_low) = match target {
HsvTarget::HSV => neon_hsv_to_rgb(h_low, s_low, v_low, v_scale),
HsvTarget::HSL => neon_hsl_to_rgb(h_low, s_low, v_low, v_scale),
};

let h_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(h_chan)));
let s_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(s_chan)));
let v_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_chan)));

let (r_high, g_high, b_high) = match target {
HsvTarget::HSV => neon_hsv_to_rgb(h_high, s_high, v_high, v_scale),
HsvTarget::HSL => neon_hsl_to_rgb(h_high, s_high, v_high, v_scale),
};

let r_chan_16_lo = vcombine_u16(vmovn_u32(r_low), vmovn_u32(r_high));
let g_chan_16_lo = vcombine_u16(vmovn_u32(g_low), vmovn_u32(g_high));
let b_chan_16_lo = vcombine_u16(vmovn_u32(b_low), vmovn_u32(b_high));

let src_ptr = src_ptr.add(8 * channels);
let (h_chan, s_chan, v_chan, a_chan_hi);
match image_configuration {
ImageConfiguration::Rgb | ImageConfiguration::Bgr => {
let hsv_pixel = vld3q_u16(src_ptr);
h_chan = hsv_pixel.0;
s_chan = hsv_pixel.1;
v_chan = hsv_pixel.2;
a_chan_hi = vdupq_n_u16(255);
}
ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
let hsv_pixel = vld4q_u16(src_ptr);
h_chan = hsv_pixel.0;
s_chan = hsv_pixel.1;
v_chan = hsv_pixel.2;
a_chan_hi = hsv_pixel.3;
}
}

let h_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(h_chan)));
let s_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(s_chan)));
let v_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_chan)));

let (r_low, g_low, b_low) = match target {
HsvTarget::HSV => neon_hsv_to_rgb(h_low, s_low, v_low, v_scale),
HsvTarget::HSL => neon_hsl_to_rgb(h_low, s_low, v_low, v_scale),
};

let h_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(h_chan)));
let s_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(s_chan)));
let v_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_chan)));

let (r_high, g_high, b_high) = match target {
HsvTarget::HSV => neon_hsv_to_rgb(h_high, s_high, v_high, v_scale),
HsvTarget::HSL => neon_hsl_to_rgb(h_high, s_high, v_high, v_scale),
};

let r_chan_16_hi = vcombine_u16(vmovn_u32(r_low), vmovn_u32(r_high));
let g_chan_16_hi = vcombine_u16(vmovn_u32(g_low), vmovn_u32(g_high));
let b_chan_16_hi = vcombine_u16(vmovn_u32(b_low), vmovn_u32(b_high));

let r_chan = vcombine_u8(vqmovn_u16(r_chan_16_lo), vqmovn_u16(r_chan_16_hi));
let g_chan = vcombine_u8(vqmovn_u16(g_chan_16_lo), vqmovn_u16(g_chan_16_hi));
let b_chan = vcombine_u8(vqmovn_u16(b_chan_16_lo), vqmovn_u16(b_chan_16_hi));

if USE_ALPHA {
let a_chan = vcombine_u8(vqmovn_u16(a_chan_lo), vqmovn_u16(a_chan_hi));
let pixel_set = uint8x16x4_t(r_chan, g_chan, b_chan, a_chan);
vst4q_u8(dst_ptr.add(cx * channels), pixel_set);
} else {
let pixel_set = uint8x16x3_t(r_chan, g_chan, b_chan);
vst3q_u8(dst_ptr.add(cx * channels), pixel_set);
}

cx += 16;
}

while cx + 8 < width as usize {
let (h_chan, s_chan, v_chan, a_chan);
let src_ptr = ((src as *const u8).add(src_offset) as *const u16).add(cx * channels);
Expand Down
2 changes: 1 addition & 1 deletion src/neon/neon_image_to_hsv.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::image::ImageConfiguration;
use crate::image_to_hsv_support::HsvTarget;
use crate::{neon_rgb_to_hsl, neon_rgb_to_hsv};
use crate::neon::{neon_rgb_to_hsl, neon_rgb_to_hsv};
use std::arch::aarch64::*;

#[cfg(all(
Expand Down
9 changes: 7 additions & 2 deletions src/neon/neon_math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@ use std::arch::aarch64::*;
#[inline(always)]
#[allow(dead_code)]
pub(crate) unsafe fn vfmodq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
let scale = vrndq_f32(vmulq_f32(a, vrecpeq_f32(b)));
prefer_vfmaq_f32(a, vnegq_f32(scale), b)
let dividend_vec = a;
let divisor_vec = b;
let division = vmulq_f32(dividend_vec, vrecpeq_f32(divisor_vec));
let int_part = vcvtq_f32_s32(vcvtq_s32_f32(division));
let product = vmulq_f32(int_part, divisor_vec);
let remainder = vsubq_f32(dividend_vec, product);
remainder
}

#[cfg(all(
Expand Down

0 comments on commit 4ceb35b

Please sign in to comment.