Skip to content

Commit

Permalink
Increase speed and precision of cube root, unaligned memory refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 16, 2024
1 parent b26bda0 commit c92719f
Show file tree
Hide file tree
Showing 17 changed files with 127 additions and 135 deletions.
4 changes: 2 additions & 2 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn main() {
lab_store.resize(width as usize * components * height as usize, 0f32);
let src_stride = width * components as u32;
let start_time = Instant::now();
rgb_to_lab(
rgb_to_sigmoidal(
src_bytes,
src_stride,
&mut lab_store,
Expand Down Expand Up @@ -92,7 +92,7 @@ fn main() {
// }

let start_time = Instant::now();
lab_to_srgb(
sigmoidal_to_rgb(
&lab_store,
store_stride as u32,
&mut dst_slice,
Expand Down
15 changes: 6 additions & 9 deletions src/concat_alpha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
#[allow(unused_imports)]
use std::arch::x86_64::*;
#[allow(unused_imports)]
use std::slice;

/// Adds alpha plane into an existing RGB/XYZ/LAB or other 3 plane image. Image will become RGBA, XYZa, LABa etc.
pub fn append_alpha(
Expand Down Expand Up @@ -69,11 +67,8 @@ pub fn append_alpha(
let mut _cx = 0usize;

let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 };
let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * 3usize) };
let a_ptr = unsafe { (a_plane.as_ptr() as *const u8).add(a_offset) as *const f32 };
let a_slice = unsafe { slice::from_raw_parts(a_ptr, width as usize) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };
let dst_slice = unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * 4) };

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
Expand Down Expand Up @@ -147,10 +142,12 @@ pub fn append_alpha(
unsafe {
let px = x * 4;
let s_x = x * 3;
*dst_slice.get_unchecked_mut(px) = *src_slice.get_unchecked(s_x);
*dst_slice.get_unchecked_mut(px + 1) = *src_slice.get_unchecked(s_x + 1);
*dst_slice.get_unchecked_mut(px + 2) = *src_slice.get_unchecked(s_x + 2);
*dst_slice.get_unchecked_mut(px + 3) = *a_slice.get_unchecked(x);
let dst = dst_ptr.add(px);
let src = src_ptr.add(s_x);
dst.write_unaligned(src.read_unaligned());
dst.add(1).write_unaligned(src.add(1).read_unaligned());
dst.add(2).write_unaligned(src.add(2).read_unaligned());
dst.add(3).write_unaligned(a_ptr.add(x).read_unaligned());
}
}

Expand Down
17 changes: 9 additions & 8 deletions src/image_to_hsv.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::slice;

use crate::image::ImageConfiguration;
use crate::image_to_hsv_support::HsvTarget;
#[cfg(all(
Expand Down Expand Up @@ -96,18 +94,20 @@ fn channels_to_hsv_u16<
let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut u16 };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };

for x in cx..width as usize {
let px = x * channels;
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
src.add(image_configuration.get_r_channel_offset())
.read_unaligned()
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
src.add(image_configuration.get_g_channel_offset())
.read_unaligned()
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
src.add(image_configuration.get_b_channel_offset())
.read_unaligned()
};

let rgb = Rgb::<u8>::new(r, g, b);
Expand All @@ -134,7 +134,8 @@ fn channels_to_hsv_u16<

if image_configuration.has_alpha() {
let a = unsafe {
*src_slice.get_unchecked(hx + image_configuration.get_a_channel_offset())
src.add(image_configuration.get_a_channel_offset())
.read_unaligned()
};
unsafe {
dst.add(3).write_unaligned(a as u16);
Expand Down
16 changes: 9 additions & 7 deletions src/image_to_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use crate::neon::neon_channels_to_linear;
))]
use crate::sse::*;
use crate::Rgb;
use std::slice;

#[inline(always)]
fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down Expand Up @@ -128,19 +127,21 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };

for x in _cx..width as usize {
let px = x * channels;
let dst = unsafe { dst_ptr.add(px) };
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
src.add(image_configuration.get_r_channel_offset())
.read_unaligned()
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
src.add(image_configuration.get_g_channel_offset())
.read_unaligned()
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
src.add(image_configuration.get_b_channel_offset())
.read_unaligned()
};

let rgb = Rgb::<u8>::new(r, g, b);
Expand All @@ -154,7 +155,8 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(

if USE_ALPHA && image_configuration.has_alpha() {
let a = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_a_channel_offset())
src.add(image_configuration.get_a_channel_offset())
.read_unaligned()
};
let a_lin = a as f32 * (1f32 / 255f32);
unsafe {
Expand Down
13 changes: 6 additions & 7 deletions src/image_to_linear_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,14 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
};

let rgb = Rgb::<u8>::new(r, g, b);
let rgb_f32 = rgb.to_rgb_f32();
let mut rgb_f32 = rgb.to_rgb_f32();
rgb_f32 = rgb_f32.apply(transfer);
let rgb = rgb_f32.to_u8();

unsafe {
let t_r = (transfer(rgb_f32.r) * 255f32).min(255f32).max(0f32) as u8;
let t_g = (transfer(rgb_f32.g) * 255f32).min(255f32).max(0f32) as u8;
let t_b = (transfer(rgb_f32.b) * 255f32).min(255f32).max(0f32) as u8;
*dst_slice.get_unchecked_mut(px) = t_r;
*dst_slice.get_unchecked_mut(px + 1) = t_g;
*dst_slice.get_unchecked_mut(px + 2) = t_b;
*dst_slice.get_unchecked_mut(px) = rgb.r;
*dst_slice.get_unchecked_mut(px + 1) = rgb.g;
*dst_slice.get_unchecked_mut(px + 2) = rgb.b;
}

if USE_ALPHA && image_configuration.has_alpha() {
Expand Down
15 changes: 6 additions & 9 deletions src/image_to_sigmoidal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
target_feature = "avx2"
))]
use crate::avx::avx_image_to_sigmoidal_row;
use std::slice;

use crate::image::ImageConfiguration;
#[cfg(all(
Expand Down Expand Up @@ -75,8 +74,6 @@ fn image_to_sigmoidal<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };

#[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
target_feature = "avx2"
Expand Down Expand Up @@ -114,20 +111,20 @@ fn image_to_sigmoidal<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(

for x in cx..width as usize {
let px = x * channels;
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
src.add(image_configuration.get_r_channel_offset()).read_unaligned()
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
src.add(image_configuration.get_g_channel_offset()).read_unaligned()
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
src.add(image_configuration.get_b_channel_offset()).read_unaligned()
};

let rgb = Rgb::<u8>::new(r, g, b);
let hx = x * channels;

let writing_ptr = unsafe { dst_ptr.add(hx) };
let writing_ptr = unsafe { dst_ptr.add(px) };

let sigmoidal = rgb.to_sigmoidal();
unsafe {
Expand All @@ -138,7 +135,7 @@ fn image_to_sigmoidal<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(

if image_configuration.has_alpha() {
let a = unsafe {
*src_slice.get_unchecked(hx + image_configuration.get_a_channel_offset())
src.add(image_configuration.get_a_channel_offset()).read_unaligned()
} as f32
* COLOR_SCALE;

Expand Down
16 changes: 9 additions & 7 deletions src/image_to_xyz_lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use crate::neon::neon_channels_to_xyz_or_lab;
))]
use crate::sse::sse_channels_to_xyz_or_lab;
use crate::{Rgb, Xyz, SRGB_TO_XYZ_D65};
use std::slice;

pub(crate) enum XyzTarget {
LAB = 0,
Expand Down Expand Up @@ -205,18 +204,20 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };

for x in cx..width as usize {
let px = x * channels;
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
src.add(image_configuration.get_r_channel_offset())
.read_unaligned()
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
src.add(image_configuration.get_g_channel_offset())
.read_unaligned()
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
src.add(image_configuration.get_b_channel_offset())
.read_unaligned()
};

let rgb = Rgb::<u8>::new(r, g, b);
Expand Down Expand Up @@ -252,7 +253,8 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons

if USE_ALPHA && image_configuration.has_alpha() {
let a = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_a_channel_offset())
src.add(image_configuration.get_a_channel_offset())
.read_unaligned()
};
let a_lin = a as f32 * (1f32 / 255f32);
let a_ptr =
Expand Down
16 changes: 9 additions & 7 deletions src/image_xyza_laba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use crate::neon::neon_channels_to_xyza_or_laba;
))]
use crate::sse::sse_channels_to_xyza_laba;
use crate::{Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65};
use std::slice;

#[inline(always)]
fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
Expand Down Expand Up @@ -97,18 +96,20 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };

for x in cx..width as usize {
let px = x * channels;
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
src.add(image_configuration.get_r_channel_offset())
.read_unaligned()
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
src.add(image_configuration.get_g_channel_offset())
.read_unaligned()
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
src.add(image_configuration.get_b_channel_offset())
.read_unaligned()
};

let rgb = Rgb::<u8>::new(r, g, b);
Expand Down Expand Up @@ -142,7 +143,8 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
}

let a = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_a_channel_offset())
src.add(image_configuration.get_a_channel_offset())
.read_unaligned()
};
let a_lin = a as f32 * (1f32 / 255f32);
unsafe {
Expand Down
17 changes: 9 additions & 8 deletions src/linear_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use crate::neon::neon_linear_to_gamma;
))]
use crate::sse::sse_linear_to_gamma;
use crate::Rgb;
use std::slice;

#[inline(always)]
fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down Expand Up @@ -128,8 +127,6 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 };
let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };

let dst_slice = unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) };

for x in _cx..width as usize {
let px = x * channels;
let src_slice = unsafe { src_ptr.add(px) };
Expand All @@ -155,10 +152,14 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
b.min(1f32).max(0f32),
);

let dst = unsafe { dst_ptr.add(px) };

unsafe {
*dst_slice.get_unchecked_mut(px) = (transfer(rgb.r) * 255f32) as u8;
*dst_slice.get_unchecked_mut(px + 1) = (transfer(rgb.g) * 255f32) as u8;
*dst_slice.get_unchecked_mut(px + 2) = (transfer(rgb.b) * 255f32) as u8;
dst.write_unaligned((transfer(rgb.r).round() * 255f32) as u8);
dst.add(1)
.write_unaligned((transfer(rgb.g).round() * 255f32) as u8);
dst.add(2)
.write_unaligned((transfer(rgb.b).round() * 255f32) as u8);
}

if USE_ALPHA && image_configuration.has_alpha() {
Expand All @@ -167,9 +168,9 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
.add(image_configuration.get_a_channel_offset())
.read_unaligned()
};
let a_lin = (a * 255f32) as u8;
let a_lin = (a * 255f32).round() as u8;
unsafe {
*dst_slice.get_unchecked_mut(px + 3) = a_lin;
dst.add(3).write_unaligned(a_lin);
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/linear_to_image_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,15 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
};

let rgb = Rgb::<u8>::new(r, g, b);
let rgb = rgb.to_rgb_f32();
let mut rgb = rgb.to_rgb_f32();

rgb = rgb.apply(transfer);
let new_rgb = rgb.to_u8();

unsafe {
*dst_slice.get_unchecked_mut(px) = (transfer(rgb.r) * 255f32) as u8;
*dst_slice.get_unchecked_mut(px + 1) = (transfer(rgb.g) * 255f32) as u8;
*dst_slice.get_unchecked_mut(px + 2) = (transfer(rgb.b) * 255f32) as u8;
*dst_slice.get_unchecked_mut(px) = new_rgb.r;
*dst_slice.get_unchecked_mut(px + 1) = new_rgb.g;
*dst_slice.get_unchecked_mut(px + 2) = new_rgb.b;
}

if USE_ALPHA && image_configuration.has_alpha() {
Expand Down
4 changes: 0 additions & 4 deletions src/luv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,14 @@ impl Luv {
}

impl LCh {
#[allow(dead_code)]
pub fn from_rgb(rgb: &Rgb<u8>) -> Self {
LCh::from_luv(Luv::from_rgb(rgb))
}

#[allow(dead_code)]
pub fn from_rgba(rgba: &Rgba<u8>) -> Self {
LCh::from_luv(Luv::from_rgba(rgba))
}

#[allow(dead_code)]
pub fn new(l: f32, c: f32, h: f32) -> Self {
LCh { l, c, h }
}
Expand All @@ -144,7 +141,6 @@ impl LCh {
}
}

#[allow(dead_code)]
pub fn to_rgb(&self) -> Rgb<u8> {
self.to_luv().to_rgb()
}
Expand Down
Loading

0 comments on commit c92719f

Please sign in to comment.