Skip to content

Commit

Permalink
SSE HSV, HSL added
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 8, 2024
1 parent 47590d9 commit 226dc54
Show file tree
Hide file tree
Showing 19 changed files with 1,009 additions and 80 deletions.
117 changes: 77 additions & 40 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use std::arch::aarch64::{vdupq_n_f32, vdupq_n_u32, vgetq_lane_f32, vgetq_lane_u32};
use colorutils_rs::*;
use image::io::Reader as ImageReader;
use image::{EncodableLayout, GenericImageView};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::time::Instant;

#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -36,31 +33,64 @@ fn main() {
// println!("Cbrt {}", l);
// }

let rgb = Rgb::<u8>::new(140, 164, 177);
let r = 140;
let g = 164;
let b = 177;
let rgb = Rgb::<u8>::new(r, g, b);
let hsl = rgb.to_hsl();
println!("RGB {:?}", rgb);
println!("HSL {:?}", hsl);
println!("Back RGB {:?}", hsl.to_rgb8());

// unsafe {
// let (h, s, l) = neon_rgb_to_hsl(vdupq_n_u32(255), vdupq_n_u32(156), vdupq_n_u32(255), vdupq_n_f32(1f32));
// println!("NEON HSL {}, {}, {}", vgetq_lane_f32::<0>(h), vgetq_lane_f32::<0>(s), vgetq_lane_f32::<0>(l));
// let (r1, g1, b1) = neon_hsl_to_rgb(h, s, l, vdupq_n_f32(1f32));
// unsafe {
// let (h, s, l) = sse_rgb_to_hsl(
// _mm_set1_epi32(r as i32),
// _mm_set1_epi32(g as i32),
// _mm_set1_epi32(b as i32),
// _mm_set1_ps(1f32),
// );
// println!(
// "NEON HSL {}, {}, {}",
// f32::from_bits(_mm_extract_ps::<0>(h) as u32),
// f32::from_bits(_mm_extract_ps::<0>(s) as u32),
// f32::from_bits(_mm_extract_ps::<0>(l) as u32)
// );
// let (r1, g1, b1) = sse_hsl_to_rgb(h, s, l, _mm_set1_ps(1f32));
//
// println!("NEON HSL -> RHB {}, {}, {}", vgetq_lane_u32::<0>(r1), vgetq_lane_u32::<0>(g1), vgetq_lane_u32::<0>(b1));
// println!(
// "NEON HSL -> RGB {}, {}, {}",
// _mm_extract_epi32::<0>(r1),
// _mm_extract_epi32::<0>(g1),
// _mm_extract_epi32::<0>(b1)
// );
// }
//
// unsafe {
// let (h, s, v) = neon_rgb_to_hsv(vdupq_n_u32(255), vdupq_n_u32(156), vdupq_n_u32(255), vdupq_n_f32(1f32));
// unsafe {
// let (h, s, v) = sse_rgb_to_hsv(
// _mm_set1_epi32(r as i32),
// _mm_set1_epi32(g as i32),
// _mm_set1_epi32(b as i32),
// _mm_set1_ps(1f32),
// );
// let hsv = rgb.to_hsv();
// println!("HSV {:?}", hsv);
// println!("NEON HSV {}, {}, {}", vgetq_lane_f32::<0>(h), vgetq_lane_f32::<0>(s), vgetq_lane_f32::<0>(v));
// let (r1, g1, b1) = neon_hsv_to_rgb(h, s,v, vdupq_n_f32(1f32));
// println!("NEON RGB {}, {}, {}", vgetq_lane_u32::<0>(r1), vgetq_lane_u32::<0>(g1), vgetq_lane_u32::<0>(b1));

// println!("HSV->RBB {:?}", hsv.to_rgb8());
// println!(
// "NEON HSV {}, {}, {}",
// f32::from_bits(_mm_extract_ps::<0>(h) as u32),
// f32::from_bits(_mm_extract_ps::<0>(s) as u32),
// f32::from_bits(_mm_extract_ps::<0>(v) as u32)
// );
// let (r1, g1, b1) = sse_hsv_to_rgb(h, s, v, _mm_set1_ps(1f32));
// println!(
// "NEON RGB {}, {}, {}",
// _mm_extract_epi32::<0>(r1),
// _mm_extract_epi32::<0>(g1),
// _mm_extract_epi32::<0>(b1)
// );
// }

let img = ImageReader::open("./assets/asset_middle.jpg")
let img = ImageReader::open("./assets/asset.jpg")
.unwrap()
.decode()
.unwrap();
Expand All @@ -71,37 +101,42 @@ fn main() {
let mut src_bytes = img.as_bytes();
let width = dimensions.0;
let height = dimensions.1;
let components = 4;

let mut dst_rgba = vec![];
dst_rgba.resize(4usize * width as usize * height as usize, 0u8);
rgb_to_rgba(
&src_bytes,
3u32 * width,
&mut dst_rgba,
4u32 * width,
width,
height,
255,
);
src_bytes = &dst_rgba;
let components = 3;

// let mut dst_rgba = vec![];
// dst_rgba.resize(4usize * width as usize * height as usize, 0u8);
// rgb_to_rgba(
// &src_bytes,
// 3u32 * width,
// &mut dst_rgba,
// 4u32 * width,
// width,
// height,
// 255,
// );
// src_bytes = &dst_rgba;

let mut dst_slice: Vec<u8> = Vec::new();
dst_slice.resize(width as usize * 4 * height as usize, 0u8);
dst_slice.resize(width as usize * components * height as usize, 0u8);

{
let mut lab_store: Vec<u16> = vec![];
let store_stride = width as usize * 4usize * std::mem::size_of::<u16>();
lab_store.resize(width as usize * 4usize * height as usize, 0u16);
let store_stride = width as usize * components * std::mem::size_of::<u16>();
lab_store.resize(width as usize * components * height as usize, 0u16);
let src_stride = width * components as u32;
let start_time = Instant::now();
rgba_to_hsl(
rgb_to_hsl(
src_bytes,
4u32 * width,
src_stride,
&mut lab_store,
store_stride as u32,
width,
height,100f32
height,
100f32,
);
let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("RGBA To HSV: {:.2?}", elapsed_time);
// let mut destination: Vec<f32> = vec![];
// destination.resize(width as usize * height as usize * 4, 0f32);
// let dst_stride = width * 4 * std::mem::size_of::<f32>() as u32;
Expand All @@ -124,18 +159,20 @@ fn main() {
// src_shift += src_stride as usize;
// }

hsl_to_rgba(
let start_time = Instant::now();
hsl_to_rgb(
&lab_store,
store_stride as u32,
&mut dst_slice,
4u32 * width,
src_stride,
width,
height,100f32,
height,
100f32,
);

let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("Fast image resize: {:.2?}", elapsed_time);
println!("HSV To RGBA: {:.2?}", elapsed_time);

// laba_to_srgb(
// &lab_store,
Expand Down
2 changes: 0 additions & 2 deletions src/avx/avx2_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ use crate::gamma_curves::TransferFunction;
use crate::image::ImageConfiguration;
#[allow(unused_imports)]
use crate::image_to_xyz_lab::XyzTarget;
#[allow(unused_imports)]
use crate::neon_gamma_curves::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
Expand Down
38 changes: 35 additions & 3 deletions src/hsv_to_image.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use std::slice;

use crate::{Hsl, Hsv};
use crate::image::ImageConfiguration;
use crate::image_to_hsv_support::HsvTarget;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
use crate::neon::neon_hsv_u16_to_image;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_hsv_u16_to_image;
use crate::{Hsl, Hsv};

#[inline(always)]
fn hsv_u16_to_channels<
Expand All @@ -27,6 +33,17 @@ fn hsv_u16_to_channels<
}
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut _has_sse = false;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}

let mut src_offset = 0usize;
let mut dst_offset = 0usize;

Expand All @@ -36,7 +53,22 @@ fn hsv_u16_to_channels<

for _ in 0..height as usize {
#[allow(unused_mut)]
let mut cx = 0usize;
let mut _cx = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
if _has_sse {
_cx = sse_hsv_u16_to_image::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
_cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
scale,
)
}
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
Expand All @@ -60,7 +92,7 @@ fn hsv_u16_to_channels<
let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };
let dst_slice = unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) };

for x in cx..width as usize {
for x in _cx..width as usize {
let px = x * channels;
let h = unsafe { *src_slice.get_unchecked(px) };
let s = unsafe { *src_slice.get_unchecked(px + 1) };
Expand Down
30 changes: 29 additions & 1 deletion src/image_to_hsv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ use crate::image_to_hsv_support::HsvTarget;
))]
use crate::neon::neon_channels_to_hsv_u16;
use crate::Rgb;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::sse_channels_to_hsv_u16;

#[inline(always)]
#[inline]
fn channels_to_hsv_u16<
const CHANNELS_CONFIGURATION: u8,
const USE_ALPHA: bool,
Expand All @@ -31,6 +33,17 @@ fn channels_to_hsv_u16<
}
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
let mut _has_sse = false;

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "sse4.1"
))]
if is_x86_feature_detected!("sse4.1") {
_has_sse = true;
}

let mut src_offset = 0usize;
let mut dst_offset = 0usize;

Expand All @@ -40,6 +53,21 @@ fn channels_to_hsv_u16<
#[allow(unused_mut)]
let mut cx = 0usize;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
if _has_sse {
cx = sse_channels_to_hsv_u16::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
cx,
src.as_ptr(),
src_offset,
width,
dst.as_mut_ptr(),
dst_offset,
scale,
)
}
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
Expand Down
6 changes: 1 addition & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,4 @@ pub use hsv_to_image::*;
pub use image_to_linear_u8::*;
pub use linear_to_image_u8::*;

pub use rgb_expand::*;
pub use neon::neon_rgb_to_hsv;
pub use neon::neon_rgb_to_hsl;
pub use neon::neon_hsv_to_rgb;
pub use neon::neon_hsl_to_rgb;
pub use rgb_expand::*;
1 change: 0 additions & 1 deletion src/luv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ const D65_XYZ: [f32; 3] = [95.047f32, 100.0f32, 108.883f32];
use crate::rgb::Rgb;
use crate::rgba::Rgba;
use crate::xyz::Xyz;
use clap::Parser;

pub(crate) const LUV_WHITE_U_PRIME: f32 =
4.0f32 * D65_XYZ[1] / (D65_XYZ[0] + 15.0 * D65_XYZ[1] + 3.0 * D65_XYZ[2]);
Expand Down
8 changes: 8 additions & 0 deletions src/neon/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
mod neon_colors;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
mod neon_gamma_curves;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
mod neon_image_to_hsv;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
Expand Down
8 changes: 0 additions & 8 deletions src/neon/neon_colors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,6 @@ pub unsafe fn neon_hsv_to_rgb(
(vcvtaq_u32_f32(r), vcvtaq_u32_f32(g), vcvtaq_u32_f32(b))
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
#[inline(always)]
pub unsafe fn neon_rgb_to_hsv(
r: uint32x4_t,
Expand Down Expand Up @@ -209,10 +205,6 @@ pub unsafe fn neon_rgb_to_hsv(
(h, vmulq_f32(s, scale), vmulq_f32(v, scale))
}

#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
#[inline(always)]
pub unsafe fn neon_rgb_to_hsl(
r: uint32x4_t,
Expand Down
2 changes: 1 addition & 1 deletion src/neon/neon_hsv_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::image_to_hsv_support::HsvTarget;
any(target_arch = "aarch64", target_arch = "arm"),
target_feature = "neon"
))]
#[inline(always)]
#[inline]
pub unsafe fn neon_hsv_u16_to_image<
const CHANNELS_CONFIGURATION: u8,
const USE_ALPHA: bool,
Expand Down
10 changes: 9 additions & 1 deletion src/sse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ mod sse_xyza_laba_to_image;
mod sse_color;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
mod sse_xyz_lab_to_image;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
mod sse_image_to_hsv;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
mod sse_hsv_to_image;

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use sse_image_to_linear_u8::*;
Expand All @@ -49,4 +53,8 @@ pub use sse_xyza_laba_to_image::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use sse_xyz_lab_to_image::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use sse_linear_to_image::*;
pub use sse_linear_to_image::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use sse_image_to_hsv::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use sse_hsv_to_image::*;
Loading

0 comments on commit 226dc54

Please sign in to comment.