From b4e35b23063e1a896b24d82ccafeef283f1d6a36 Mon Sep 17 00:00:00 2001 From: awxkee Date: Fri, 11 Oct 2024 00:19:32 +0100 Subject: [PATCH] Big reworking with speed increasing --- src/gamma_curves.rs | 10 +++++ src/image_to_linear.rs | 89 +++++++++++++++++++-------------------- src/linear_to_image_u8.rs | 2 - src/planar_to_linear.rs | 2 +- 4 files changed, 54 insertions(+), 49 deletions(-) diff --git a/src/gamma_curves.rs b/src/gamma_curves.rs index 62ca494..85f5a2d 100644 --- a/src/gamma_curves.rs +++ b/src/gamma_curves.rs @@ -289,6 +289,12 @@ pub fn hlg_from_linear(linear: f32) -> f32 { } } +#[inline] +/// Gamma transfer function for HLG +pub fn trc_linear(v: f32) -> f32 { + v.min(1.).min(0.) +} + #[repr(C)] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] /// Declares transfer function for transfer components into a linear colorspace and its inverse @@ -315,6 +321,8 @@ pub enum TransferFunction { Pq, /// HLG (Hybrid log gamma) Transfer function Hlg, + /// Linear transfer function + Linear, } impl From for TransferFunction { @@ -352,6 +360,7 @@ impl TransferFunction { TransferFunction::Smpte240 => smpte240_to_linear(v), TransferFunction::Pq => pq_to_linear(v), TransferFunction::Hlg => hlg_to_linear(v), + TransferFunction::Linear => trc_linear(v), } } @@ -369,6 +378,7 @@ impl TransferFunction { TransferFunction::Smpte240 => smpte240_from_linear(v), TransferFunction::Pq => pq_from_linear(v), TransferFunction::Hlg => hlg_from_linear(v), + TransferFunction::Linear => trc_linear(v), } } } diff --git a/src/image_to_linear.rs b/src/image_to_linear.rs index 0d46c98..71441d0 100644 --- a/src/image_to_linear.rs +++ b/src/image_to_linear.rs @@ -11,7 +11,6 @@ use crate::Rgb; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; #[cfg(feature = "rayon")] use rayon::prelude::{ParallelSlice, ParallelSliceMut}; -#[cfg(feature = "rayon")] use std::slice; #[allow(clippy::type_complexity)] @@ -36,65 +35,63 @@ fn channels_to_linear( lut_table[i] = transfer_function.linearize(i as f32 * (1. / 255.0)); } + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + #[cfg(not(feature = "rayon"))] { - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - for _ in 0..height as usize { - let mut _cx = 0usize; + for (dst_row, src_row) in dst_slice_safe_align + .chunks_exact_mut(dst_stride as usize) + .zip(src.chunks_exact(src_stride as usize)) + { + unsafe { + let mut _cx = 0usize; - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + let src_ptr = src_row.as_ptr(); + let dst_ptr = dst_row.as_mut_ptr() as *mut f32; - for x in _cx..width as usize { - let px = x * channels; - let dst = unsafe { dst_ptr.add(px) }; - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; + for x in _cx..width as usize { + let px = x * channels; + let dst = dst_ptr.add(px); + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); - let rgb = Rgb::::new(r, g, b); + let rgb = Rgb::::new(r, g, b); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(*lut_table.get_unchecked(rgb.r as usize)); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(*lut_table.get_unchecked(rgb.g as usize)); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(*lut_table.get_unchecked(rgb.b as usize)); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(*lut_table.get_unchecked(rgb.r as usize)); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(*lut_table.get_unchecked(rgb.g as usize)); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(*lut_table.get_unchecked(rgb.b as usize)); - if USE_ALPHA && image_configuration.has_alpha() { - let a = src - .add(image_configuration.get_a_channel_offset()) - .read_unaligned(); - let a_lin = a as f32 * (1f32 / 255f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_lin); + if USE_ALPHA && image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_lin); + } } } - - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; } } #[cfg(feature = "rayon")] { - let dst_slice_safe_align = unsafe { - slice::from_raw_parts_mut( - dst.as_mut_ptr() as *mut u8, - dst_stride as usize * height as usize, - ) - }; dst_slice_safe_align .par_chunks_exact_mut(dst_stride as usize) .zip(src.par_chunks_exact(src_stride as usize)) diff --git a/src/linear_to_image_u8.rs b/src/linear_to_image_u8.rs index f81fd8f..f0167ad 100644 --- a/src/linear_to_image_u8.rs +++ b/src/linear_to_image_u8.rs @@ -12,8 +12,6 @@ use crate::Rgb; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; #[cfg(feature = "rayon")] use rayon::prelude::{ParallelSlice, ParallelSliceMut}; -#[cfg(not(feature = "rayon"))] -use std::slice; #[allow(clippy::type_complexity)] fn linear_to_gamma_channels( diff --git a/src/planar_to_linear.rs b/src/planar_to_linear.rs index 51552d6..388f85b 100644 --- a/src/planar_to_linear.rs +++ b/src/planar_to_linear.rs @@ -73,7 +73,7 @@ fn channels_to_linear( let px = x; let dst = dst_ptr.add(px); let src = src_ptr.add(px); - let transferred = *lut_table.get_unchecked(src.read_unaligned()); + let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize); dst.write_unaligned(transferred); }