Skip to content

Commit

Permalink
Big reworking with speed increasing
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 10, 2024
1 parent fb21ab3 commit b903703
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 113 deletions.
4 changes: 2 additions & 2 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fn main() {
lab_store.resize(width as usize * components * height as usize, 0.);
let src_stride = width * components as u32;
let start_time = Instant::now();
bgr_to_lalphabeta(
bgr_to_oklab(
src_bytes,
src_stride,
&mut lab_store,
Expand Down Expand Up @@ -103,7 +103,7 @@ fn main() {
// }

let start_time = Instant::now();
lalphabeta_to_bgr(
oklab_to_bgr(
&lab_store,
store_stride as u32,
&mut dst_slice,
Expand Down
42 changes: 22 additions & 20 deletions src/image_to_linear_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,26 +41,28 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
.chunks_exact_mut(dst_stride as usize)
.zip(l_src.chunks_exact(src_stride as usize))
{
let mut _cx = 0usize;

for x in _cx..width as usize {
let px = x * channels;
let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset());
let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset());
let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset());

let rgb = Rgb::<u8>::new(r, g, b);

*dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) =
*lut_table.get_unchecked(rgb.r as usize);
*dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) =
*lut_table.get_unchecked(rgb.g as usize);
*dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) =
*lut_table.get_unchecked(rgb.b as usize);

if USE_ALPHA && image_configuration.has_alpha() {
let a = *src_row.get_unchecked(px + image_configuration.get_a_channel_offset());
*dst_row.get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a;
unsafe {
let mut _cx = 0usize;

for x in _cx..width as usize {
let px = x * channels;
let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset());
let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset());
let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset());

let rgb = Rgb::<u8>::new(r, g, b);

*dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) =
*lut_table.get_unchecked(rgb.r as usize);
*dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) =
*lut_table.get_unchecked(rgb.g as usize);
*dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) =
*lut_table.get_unchecked(rgb.b as usize);

if USE_ALPHA && image_configuration.has_alpha() {
let a = *src_row.get_unchecked(px + image_configuration.get_a_channel_offset());
*dst_row.get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a;
}
}
}
}
Expand Down
75 changes: 49 additions & 26 deletions src/lalphabeta_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,42 +102,65 @@ fn lalphabeta_to_image<const CHANNELS_CONFIGURATION: u8>(

#[cfg(not(feature = "rayon"))]
{
let mut src_offset = 0usize;
let mut dst_offset = 0usize;
for (dst, src) in dst
.chunks_exact_mut(dst_stride as usize)
.zip(src_slice_safe_align.chunks_exact(src_stride as usize))
{
unsafe {
let mut _cx = 0usize;

for _ in 0..height as usize {
let mut _cx = 0usize;
let src_ptr = src.as_ptr() as *mut f32;

let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 };
let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };
let mut transient_row = vec![0f32; width as usize * channels];

for x in _cx..width as usize {
let px = x * channels;
let l_x = unsafe { src_ptr.add(px).read_unaligned() };
let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() };
let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() };
let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z);
let rgb = lalphabeta.to_rgb(transfer_function);
for x in _cx..width as usize {
let px = x * channels;
let l_x = src_ptr.add(px).read_unaligned();
let l_y = src_ptr.add(px + 1).read_unaligned();
let l_z = src_ptr.add(px + 2).read_unaligned();
let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z);
let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65);

unsafe {
let dst = dst_ptr.add(x * channels);
dst.add(image_configuration.get_r_channel_offset())
.write_unaligned(rgb.r);
dst.add(image_configuration.get_g_channel_offset())
.write_unaligned(rgb.g);
dst.add(image_configuration.get_b_channel_offset())
.write_unaligned(rgb.b);
let dst = transient_row.get_unchecked_mut((x * channels)..);
*dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
*dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
*dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
if image_configuration.has_alpha() {
let l_a = src_ptr.add(px + 3).read_unaligned();
let a_value = (l_a * 255f32).max(0f32);
dst.add(image_configuration.get_a_channel_offset())
.write_unaligned(a_value as u8);
*dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) =
a_value;
}
}
}

src_offset += src_stride as usize;
dst_offset += dst_stride as usize;
for (dst, src) in dst
.chunks_exact_mut(channels)
.zip(transient_row.chunks_exact(channels))
{
let r = src[image_configuration.get_r_channel_offset()];
let g = src[image_configuration.get_g_channel_offset()];
let b = src[image_configuration.get_b_channel_offset()];

let rgb = (Rgb::<f32>::new(
r.min(1f32).max(0f32),
g.min(1f32).max(0f32),
b.min(1f32).max(0f32),
) * Rgb::<f32>::dup(2048f32))
.round()
.cast::<u16>();

dst[image_configuration.get_r_channel_offset()] =
*lut_table.get_unchecked(rgb.r.min(2048) as usize);
dst[image_configuration.get_g_channel_offset()] =
*lut_table.get_unchecked(rgb.g.min(2048) as usize);
dst[image_configuration.get_b_channel_offset()] =
*lut_table.get_unchecked(rgb.b.min(2048) as usize);
if image_configuration.has_alpha() {
dst[image_configuration.get_a_channel_offset()] =
src[image_configuration.get_a_channel_offset()] as u8;
}
}
}
}
}
}
Expand Down
20 changes: 11 additions & 9 deletions src/linear_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
let mut lut_table = vec![0u8; 2049];
for i in 0..2049 {
lut_table[i] = (transfer_function.gamma(i as f32 * (1. / 2048.0)) * 255.)
.ceil()
.round()
.min(255.) as u8;
}

Expand Down Expand Up @@ -77,18 +77,19 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b

let dst = dst_ptr.add(px);

dst.write_unaligned(*lut_table.get_unchecked(rgb.r.min(2048) as usize));
dst.add(1)
dst.add(image_configuration.get_r_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.r.min(2048) as usize));
dst.add(image_configuration.get_g_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.g.min(2048) as usize));
dst.add(2)
dst.add(image_configuration.get_b_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.b.min(2048) as usize));

if USE_ALPHA && image_configuration.has_alpha() {
let a = src_slice
.add(image_configuration.get_a_channel_offset())
.read_unaligned();
let a_lin = (a * 255f32).round() as u8;
dst.add(3).write_unaligned(a_lin);
dst.add(image_configuration.get_a_channel_offset()).write_unaligned(a_lin);
}
}
});
Expand Down Expand Up @@ -129,18 +130,19 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b

let dst = dst_ptr.add(px);

dst.write_unaligned(*lut_table.get_unchecked(rgb.r.min(2048) as usize));
dst.add(1)
dst.add(image_configuration.get_r_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.r.min(2048) as usize));
dst.add(image_configuration.get_g_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.g.min(2048) as usize));
dst.add(2)
dst.add(image_configuration.get_b_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.b.min(2048) as usize));

if USE_ALPHA && image_configuration.has_alpha() {
let a = src_slice
.add(image_configuration.get_a_channel_offset())
.read_unaligned();
let a_lin = (a * 255f32).round() as u8;
dst.add(3).write_unaligned(a_lin);
dst.add(image_configuration.get_a_channel_offset()).write_unaligned(a_lin);
}
}
}
Expand Down
83 changes: 35 additions & 48 deletions src/linear_to_image_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,68 +54,55 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b

let rgb = Rgb::<u8>::new(r, g, b);

*dst.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize);
*dst.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize);
*dst.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize);
let dst = dst.get_unchecked_mut(px..);

*dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) =
*lut_table.get_unchecked(rgb.r as usize);
*dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) =
*lut_table.get_unchecked(rgb.g as usize);
*dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) =
*lut_table.get_unchecked(rgb.b as usize);

if USE_ALPHA && image_configuration.has_alpha() {
let a = src.get_unchecked(px + image_configuration.get_a_channel_offset());
*dst.get_unchecked_mut(px + 3) = *a;
*dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = *a;
}
}
});
}

#[cfg(not(feature = "rayon"))]
{
let mut src_offset = 0usize;
let mut dst_offset = 0usize;

for _ in 0.._height as usize {
let mut _cx = 0usize;

let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };

let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) };
let dst_slice =
unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) };

for x in _cx..width as usize {
let px = x * channels;
let r = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_r_channel_offset())
};
let g = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_g_channel_offset())
};
let b = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_b_channel_offset())
};

let rgb = Rgb::<u8>::new(r, g, b);
let mut rgb = rgb.to_rgb_f32();

rgb = rgb.gamma(transfer_function);

unsafe {
*dst_slice.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize);
*dst_slice.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize);
*dst_slice.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize);
}
for (dst, src) in dst
.chunks_exact_mut(dst_stride as usize)
.zip(src.chunks_exact(src_stride as usize))
{
unsafe {
let mut _cx = 0usize;

for x in _cx..width as usize {
let px = x * channels;
let r = *src.get_unchecked(px + image_configuration.get_r_channel_offset());
let g = *src.get_unchecked(px + image_configuration.get_g_channel_offset());
let b = *src.get_unchecked(px + image_configuration.get_b_channel_offset());

let rgb = Rgb::<u8>::new(r, g, b);

let dst = dst.get_unchecked_mut(px..);

if USE_ALPHA && image_configuration.has_alpha() {
let a = unsafe {
*src_slice.get_unchecked(px + image_configuration.get_a_channel_offset())
};
unsafe {
*dst_slice.get_unchecked_mut(px + 3) = a;
*dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) =
*lut_table.get_unchecked(rgb.r as usize);
*dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) =
*lut_table.get_unchecked(rgb.g as usize);
*dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) =
*lut_table.get_unchecked(rgb.b as usize);

if USE_ALPHA && image_configuration.has_alpha() {
let a = src.get_unchecked(px + image_configuration.get_a_channel_offset());
*dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = *a;
}
}
}

src_offset += src_stride as usize;
dst_offset += dst_stride as usize;
}
}
}
Expand Down
30 changes: 22 additions & 8 deletions src/oklab_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,17 @@ fn oklab_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
.zip(transient_row.chunks_exact_mut(channels))
{
let rgb = (Rgb::<f32>::new(
src_chunks[image_configuration.get_r_channel_offset()],
src_chunks[image_configuration.get_g_channel_offset()],
src_chunks[image_configuration.get_b_channel_offset()],
src_chunks[image_configuration.get_r_channel_offset()]
.max(0.)
.min(1.),
src_chunks[image_configuration.get_g_channel_offset()]
.max(0.)
.min(1.),
src_chunks[image_configuration.get_b_channel_offset()]
.max(0.)
.min(1.),
) * Rgb::<f32>::dup(2048f32))
.round()
.cast::<u16>();

dst_chunks[image_configuration.get_r_channel_offset()] =
Expand Down Expand Up @@ -183,11 +190,18 @@ fn oklab_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
.chunks_exact_mut(channels)
.zip(transient_row.chunks_exact_mut(channels))
{
let rgb = (Rgb::<f32>::new(
src_chunks[image_configuration.get_r_channel_offset()],
src_chunks[image_configuration.get_g_channel_offset()],
src_chunks[image_configuration.get_b_channel_offset()],
) * Rgb::<f32>::dup(2048f32))
let rgb = (Rgb::new(
src_chunks[image_configuration.get_r_channel_offset()]
.max(0.)
.min(1.),
src_chunks[image_configuration.get_g_channel_offset()]
.max(0.)
.min(1.),
src_chunks[image_configuration.get_b_channel_offset()]
.max(0.)
.min(1.),
) * Rgb::dup(2048f32))
.round()
.cast::<u16>();

dst_chunks[image_configuration.get_r_channel_offset()] =
Expand Down

0 comments on commit b903703

Please sign in to comment.