Skip to content

Commit

Permalink
Big reworking with speed increasing
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 10, 2024
1 parent bddeaae commit b4e35b2
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 49 deletions.
10 changes: 10 additions & 0 deletions src/gamma_curves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,12 @@ pub fn hlg_from_linear(linear: f32) -> f32 {
}
}

#[inline]
/// Gamma transfer function for HLG
pub fn trc_linear(v: f32) -> f32 {
v.min(1.).min(0.)
}

#[repr(C)]
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
/// Declares transfer function for transfer components into a linear colorspace and its inverse
Expand All @@ -315,6 +321,8 @@ pub enum TransferFunction {
Pq,
/// HLG (Hybrid log gamma) Transfer function
Hlg,
/// Linear transfer function
Linear,
}

impl From<u8> for TransferFunction {
Expand Down Expand Up @@ -352,6 +360,7 @@ impl TransferFunction {
TransferFunction::Smpte240 => smpte240_to_linear(v),
TransferFunction::Pq => pq_to_linear(v),
TransferFunction::Hlg => hlg_to_linear(v),
TransferFunction::Linear => trc_linear(v),
}
}

Expand All @@ -369,6 +378,7 @@ impl TransferFunction {
TransferFunction::Smpte240 => smpte240_from_linear(v),
TransferFunction::Pq => pq_from_linear(v),
TransferFunction::Hlg => hlg_from_linear(v),
TransferFunction::Linear => trc_linear(v),
}
}
}
89 changes: 43 additions & 46 deletions src/image_to_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use crate::Rgb;
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
#[cfg(feature = "rayon")]
use rayon::prelude::{ParallelSlice, ParallelSliceMut};
#[cfg(feature = "rayon")]
use std::slice;

#[allow(clippy::type_complexity)]
Expand All @@ -36,65 +35,63 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
lut_table[i] = transfer_function.linearize(i as f32 * (1. / 255.0));
}

let dst_slice_safe_align = unsafe {
slice::from_raw_parts_mut(
dst.as_mut_ptr() as *mut u8,
dst_stride as usize * height as usize,
)
};

#[cfg(not(feature = "rayon"))]
{
let mut src_offset = 0usize;
let mut dst_offset = 0usize;
for _ in 0..height as usize {
let mut _cx = 0usize;
for (dst_row, src_row) in dst_slice_safe_align
.chunks_exact_mut(dst_stride as usize)
.zip(src.chunks_exact(src_stride as usize))
{
unsafe {
let mut _cx = 0usize;

let src_ptr = unsafe { src.as_ptr().add(src_offset) };
let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };
let src_ptr = src_row.as_ptr();
let dst_ptr = dst_row.as_mut_ptr() as *mut f32;

for x in _cx..width as usize {
let px = x * channels;
let dst = unsafe { dst_ptr.add(px) };
let src = unsafe { src_ptr.add(px) };
let r = unsafe {
src.add(image_configuration.get_r_channel_offset())
.read_unaligned()
};
let g = unsafe {
src.add(image_configuration.get_g_channel_offset())
.read_unaligned()
};
let b = unsafe {
src.add(image_configuration.get_b_channel_offset())
.read_unaligned()
};
for x in _cx..width as usize {
let px = x * channels;
let dst = dst_ptr.add(px);
let src = src_ptr.add(px);
let r = src
.add(image_configuration.get_r_channel_offset())
.read_unaligned();
let g = src
.add(image_configuration.get_g_channel_offset())
.read_unaligned();
let b = src
.add(image_configuration.get_b_channel_offset())
.read_unaligned();

let rgb = Rgb::<u8>::new(r, g, b);
let rgb = Rgb::<u8>::new(r, g, b);

dst.add(image_configuration.get_r_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
dst.add(image_configuration.get_g_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.g as usize));
dst.add(image_configuration.get_b_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.b as usize));
dst.add(image_configuration.get_r_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
dst.add(image_configuration.get_g_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.g as usize));
dst.add(image_configuration.get_b_channel_offset())
.write_unaligned(*lut_table.get_unchecked(rgb.b as usize));

if USE_ALPHA && image_configuration.has_alpha() {
let a = src
.add(image_configuration.get_a_channel_offset())
.read_unaligned();
let a_lin = a as f32 * (1f32 / 255f32);
dst.add(image_configuration.get_a_channel_offset())
.write_unaligned(a_lin);
if USE_ALPHA && image_configuration.has_alpha() {
let a = src
.add(image_configuration.get_a_channel_offset())
.read_unaligned();
let a_lin = a as f32 * (1f32 / 255f32);
dst.add(image_configuration.get_a_channel_offset())
.write_unaligned(a_lin);
}
}
}

src_offset += src_stride as usize;
dst_offset += dst_stride as usize;
}
}

#[cfg(feature = "rayon")]
{
let dst_slice_safe_align = unsafe {
slice::from_raw_parts_mut(
dst.as_mut_ptr() as *mut u8,
dst_stride as usize * height as usize,
)
};
dst_slice_safe_align
.par_chunks_exact_mut(dst_stride as usize)
.zip(src.par_chunks_exact(src_stride as usize))
Expand Down
2 changes: 0 additions & 2 deletions src/linear_to_image_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ use crate::Rgb;
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
#[cfg(feature = "rayon")]
use rayon::prelude::{ParallelSlice, ParallelSliceMut};
#[cfg(not(feature = "rayon"))]
use std::slice;

#[allow(clippy::type_complexity)]
fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
Expand Down
2 changes: 1 addition & 1 deletion src/planar_to_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fn channels_to_linear(
let px = x;
let dst = dst_ptr.add(px);
let src = src_ptr.add(px);
let transferred = *lut_table.get_unchecked(src.read_unaligned());
let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize);

dst.write_unaligned(transferred);
}
Expand Down

0 comments on commit b4e35b2

Please sign in to comment.