From fb21ab3e7eea56f2cede639d06fc6bf46f3f9b2e Mon Sep 17 00:00:00 2001
From: awxkee <radzivon.bartoshyk@proton.me>
Date: Thu, 10 Oct 2024 23:45:25 +0100
Subject: [PATCH] Big reworking with speed increasing

---
 Cargo.lock                   |   2 +-
 src/app/src/main.rs          |  10 +-
 src/avx/gamma_curves.rs      |  27 +----
 src/gamma_curves.rs          | 214 +++++++++++++++++++++++++++++++++-
 src/image_to_jzazbz.rs       | 211 +++++++++++++++++----------------
 src/image_to_lalphabeta.rs   | 162 +++++++++++++++-----------
 src/image_to_linear.rs       |  35 +++---
 src/image_to_linear_u8.rs    |  45 ++++---
 src/image_to_xyz_lab.rs      |  68 ++++++++---
 src/image_xyza_laba.rs       | 104 +++++++++++++----
 src/jzazbz_to_image.rs       | 199 ++++++++++++++++++++-----------
 src/jzczhz.rs                |  23 +++-
 src/lalphabeta.rs            |  16 ++-
 src/lalphabeta_to_image.rs   |  71 ++++++++---
 src/linear_to_planar.rs      | 102 +++++-----------
 src/neon/gamma_curves.rs     |  31 +----
 src/neon/image_to_jzazbz.rs  | 220 ++---------------------------------
 src/neon/jzazbz_to_image.rs  | 208 +++------------------------------
 src/neon/linear_to_planar.rs |  78 -------------
 src/neon/mod.rs              |   3 -
 src/neon/planar_to_linear.rs |  84 -------------
 src/oklab_to_image.rs        |  44 ++++---
 src/planar_to_linear.rs      |  84 ++++---------
 src/sse/gamma_curves.rs      |  29 +----
 src/sse/image_to_jzazbz.rs   | 212 +++------------------------------
 src/sse/jzazbz_to_image.rs   | 154 +++---------------------
 src/sse/linear_to_planar.rs  |  84 -------------
 src/sse/planar_to_linear.rs  |  92 ---------------
 src/xyz_lab_to_image.rs      | 130 ++++++++++++++++-----
 src/xyza_laba_to_image.rs    | 136 ++++++++++++++++------
 30 files changed, 1151 insertions(+), 1727 deletions(-)
 delete mode 100644 src/neon/linear_to_planar.rs
 delete mode 100644 src/neon/planar_to_linear.rs
 delete mode 100644 src/sse/linear_to_planar.rs
 delete mode 100644 src/sse/planar_to_linear.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9a7e897..ccd37b4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -169,7 +169,7 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
 
 [[package]]
 name = "colorutils-rs"
-version = "0.6.2"
+version = "0.7.0"
 dependencies = [
  "erydanos",
  "half",
diff --git a/src/app/src/main.rs b/src/app/src/main.rs
index 0b52aab..bf7c58f 100644
--- a/src/app/src/main.rs
+++ b/src/app/src/main.rs
@@ -68,15 +68,14 @@ fn main() {
         lab_store.resize(width as usize * components * height as usize, 0.);
         let src_stride = width * components as u32;
         let start_time = Instant::now();
-        rgb_to_jzazbz(
+        bgr_to_lalphabeta(
             src_bytes,
             src_stride,
             &mut lab_store,
             store_stride as u32,
             width,
             height,
-            200.,
-            TransferFunction::Srgb,
+            TransferFunction::Pq,
         );
         let elapsed_time = start_time.elapsed();
         // Print the elapsed time in milliseconds
@@ -104,15 +103,14 @@ fn main() {
         // }
 
         let start_time = Instant::now();
-        jzazbz_to_rgb(
+        lalphabeta_to_bgr(
             &lab_store,
             store_stride as u32,
             &mut dst_slice,
             src_stride,
             width,
             height,
-            200.,
-            TransferFunction::Srgb,
+            TransferFunction::Pq,
         );
 
         let elapsed_time = start_time.elapsed();
diff --git a/src/avx/gamma_curves.rs b/src/avx/gamma_curves.rs
index 76898df..e7d8e43 100644
--- a/src/avx/gamma_curves.rs
+++ b/src/avx/gamma_curves.rs
@@ -4,7 +4,7 @@
  * // Use of this source code is governed by a BSD-style
  * // license that can be found in the LICENSE file.
  */
-
+#![allow(dead_code)]
 use crate::avx::math::*;
 #[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
@@ -148,28 +148,3 @@ pub unsafe fn avx2_gamma2p2_from_linear(linear: __m256) -> __m256 {
 pub unsafe fn avx2_gamma2p8_from_linear(linear: __m256) -> __m256 {
     avx2_pure_gamma(linear, 1f32 / 2.8f32)
 }
-
-#[inline(always)]
-pub unsafe fn perform_avx_gamma_transfer(transfer_function: TransferFunction, v: __m256) -> __m256 {
-    match transfer_function {
-        TransferFunction::Srgb => avx2_srgb_from_linear(v),
-        TransferFunction::Rec709 => avx2_rec709_from_linear(v),
-        TransferFunction::Gamma2p2 => avx2_gamma2p2_from_linear(v),
-        TransferFunction::Gamma2p8 => avx2_gamma2p8_from_linear(v),
-        TransferFunction::Smpte428 => avx2_smpte428_from_linear(v),
-    }
-}
-
-#[inline(always)]
-pub unsafe fn perform_avx2_linear_transfer(
-    transfer_function: TransferFunction,
-    v: __m256,
-) -> __m256 {
-    match transfer_function {
-        TransferFunction::Srgb => avx2_srgb_to_linear(v),
-        TransferFunction::Rec709 => avx2_rec709_to_linear(v),
-        TransferFunction::Gamma2p2 => avx2_gamma2p2_to_linear(v),
-        TransferFunction::Gamma2p8 => avx2_gamma2p8_to_linear(v),
-        TransferFunction::Smpte428 => avx2_smpte428_to_linear(v),
-    }
-}
diff --git a/src/gamma_curves.rs b/src/gamma_curves.rs
index 01324cb..62ca494 100644
--- a/src/gamma_curves.rs
+++ b/src/gamma_curves.rs
@@ -75,6 +75,110 @@ pub fn smpte428_from_linear(linear: f32) -> f32 {
     (0.91655527974030934f32 * linear.max(0.)).powf(POWER_VALUE)
 }
 
+#[inline]
+/// Linear transfer function for Smpte 240
+pub fn smpte240_to_linear(gamma: f32) -> f32 {
+    if gamma < 0.0 {
+        0.0
+    } else if gamma < 4.0 * 0.022821585529445 {
+        gamma / 4.0
+    } else if gamma < 1.0 {
+        f32::powf((gamma + 0.111572195921731) / 1.111572195921731, 1.0 / 0.45)
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Smpte 240
+pub fn smpte240_from_linear(linear: f32) -> f32 {
+    if linear < 0.0 {
+        0.0
+    } else if linear < 0.022821585529445 {
+        linear * 4.0
+    } else if linear < 1.0 {
+        1.111572195921731 * f32::powf(linear, 0.45) - 0.111572195921731
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Log100
+pub fn log100_from_linear(linear: f32) -> f32 {
+    if linear <= 0.01f32 {
+        0.
+    } else {
+        1. + linear.min(1.).log10() / 2.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for Log100
+pub fn log100_to_linear(gamma: f32) -> f32 {
+    // The function is non-bijective so choose the middle of [0, 0.00316227766f].
+    const MID_INTERVAL: f32 = 0.01 / 2.;
+    if gamma <= 0. {
+        MID_INTERVAL
+    } else {
+        10f32.powf(2. * (gamma.min(1.) - 1.))
+    }
+}
+
+#[inline]
+/// Linear transfer function for Log100Sqrt10
+pub fn log100_sqrt10_to_linear(gamma: f32) -> f32 {
+    // The function is non-bijective so choose the middle of [0, 0.00316227766f].
+    const MID_INTERVAL: f32 = 0.00316227766 / 2.;
+    if gamma <= 0. {
+        MID_INTERVAL
+    } else {
+        10f32.powf(2.5 * (gamma.min(1.) - 1.))
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Log100Sqrt10
+pub fn log100_sqrt10_from_linear(linear: f32) -> f32 {
+    if linear <= 0.00316227766 {
+        0.0
+    } else {
+        1.0 + linear.min(1.).log10() / 2.5
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Bt.1361
+pub fn bt1361_from_linear(linear: f32) -> f32 {
+    if linear < -0.25 {
+        -0.25
+    } else if linear < 0.0 {
+        -0.27482420670236 * f32::powf(-4.0 * linear, 0.45) + 0.02482420670236
+    } else if linear < 0.018053968510807 {
+        linear * 4.5
+    } else if linear < 1.0 {
+        1.09929682680944 * f32::powf(linear, 0.45) - 0.09929682680944
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for Bt.1361
+pub fn bt1361_to_linear(gamma: f32) -> f32 {
+    if gamma < -0.25 {
+        -0.25
+    } else if gamma < 0.0 {
+        f32::powf((gamma - 0.02482420670236) / -0.27482420670236, 1.0 / 0.45) / -4.0
+    } else if gamma < 4.5 * 0.018053968510807 {
+        gamma / 4.5
+    } else if gamma < 1.0 {
+        f32::powf((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45)
+    } else {
+        1.0
+    }
+}
+
 #[inline(always)]
 /// Pure gamma transfer function for gamma 2.2
 pub fn pure_gamma_function(x: f32, gamma: f32) -> f32 {
@@ -111,6 +215,80 @@ pub fn gamma2p8_to_linear(gamma: f32) -> f32 {
     pure_gamma_function(gamma, 2.8f32)
 }
 
+#[inline]
+/// Linear transfer function for PQ
+pub fn pq_to_linear(gamma: f32) -> f32 {
+    if gamma > 0.0 {
+        let pow_gamma = f32::powf(gamma, 1.0 / 78.84375);
+        let num = (pow_gamma - 0.8359375).max(0.);
+        let den = (18.8515625 - 18.6875 * pow_gamma).max(f32::MIN);
+        let linear = f32::powf(num / den, 1.0 / 0.1593017578125);
+        // Scale so that SDR white is 1.0 (extended SDR).
+        const PQ_MAX_NITS: f32 = 10000.;
+        const SDR_WHITE_NITS: f32 = 203.;
+        linear * PQ_MAX_NITS / SDR_WHITE_NITS
+    } else {
+        0.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for PQ
+pub fn pq_from_linear(linear: f32) -> f32 {
+    const PQ_MAX_NITS: f32 = 10000.;
+    const SDR_WHITE_NITS: f32 = 203.;
+
+    if linear > 0.0 {
+        // Scale from extended SDR range to [0.0, 1.0].
+        let linear = (linear * SDR_WHITE_NITS / PQ_MAX_NITS).clamp(0., 1.);
+        let pow_linear = f32::powf(linear, 0.1593017578125);
+        let num = 0.1640625 * pow_linear - 0.1640625;
+        let den = 1.0 + 18.6875 * pow_linear;
+        f32::powf(1.0 + num / den, 78.84375)
+    } else {
+        0.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for HLG
+pub fn hlg_to_linear(gamma: f32) -> f32 {
+    const SDR_WHITE_NITS: f32 = 203.;
+    const HLG_WHITE_NITS: f32 = 1000.;
+    if gamma < 0.0 {
+        return 0.0;
+    }
+    let linear;
+    if gamma <= 0.5 {
+        linear = f32::powf((gamma * gamma) * (1.0 / 3.0), 1.2);
+    } else {
+        linear = f32::powf(
+            (f32::exp((gamma - 0.55991073) / 0.17883277) + 0.28466892) / 12.0,
+            1.2,
+        );
+    }
+    // Scale so that SDR white is 1.0 (extended SDR).
+    linear * HLG_WHITE_NITS / SDR_WHITE_NITS
+}
+
+#[inline]
+/// Gamma transfer function for HLG
+pub fn hlg_from_linear(linear: f32) -> f32 {
+    const SDR_WHITE_NITS: f32 = 203.;
+    const HLG_WHITE_NITS: f32 = 1000.;
+    // Scale from extended SDR range to [0.0, 1.0].
+    let mut linear = (linear * (SDR_WHITE_NITS / HLG_WHITE_NITS)).clamp(0., 1.);
+    // Inverse OOTF followed by OETF see Table 5 and Note 5i in ITU-R BT.2100-2 page 7-8.
+    linear = f32::powf(linear, 1.0 / 1.2);
+    if linear < 0.0 {
+        0.0
+    } else if linear <= (1.0 / 12.0) {
+        f32::sqrt(3.0 * linear)
+    } else {
+        0.17883277 * f32::ln(12.0 * linear - 0.28466892) + 0.55991073
+    }
+}
+
 #[repr(C)]
 #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
 /// Declares transfer function for transfer components into a linear colorspace and its inverse
@@ -119,12 +297,24 @@ pub enum TransferFunction {
     Srgb,
     /// Rec.709 Transfer function
     Rec709,
-    /// Pure gamma 2.2 Transfer function
+    /// Pure gamma 2.2 Transfer function, ITU-R 470M
     Gamma2p2,
-    /// Pure gamma 2.8 Transfer function
+    /// Pure gamma 2.8 Transfer function, ITU-R 470BG
     Gamma2p8,
     /// Smpte 428 Transfer function
     Smpte428,
+    /// Log100 Transfer function
+    Log100,
+    /// Log100Sqrt10 Transfer function
+    Log100Sqrt10,
+    /// Bt1361 Transfer function
+    Bt1361,
+    /// Smpte 240 Transfer function
+    Smpte240,
+    /// PQ Transfer function
+    Pq,
+    /// HLG (Hybrid log gamma) Transfer function
+    Hlg,
 }
 
 impl From<u8> for TransferFunction {
@@ -136,6 +326,12 @@ impl From<u8> for TransferFunction {
             2 => TransferFunction::Gamma2p2,
             3 => TransferFunction::Gamma2p8,
             4 => TransferFunction::Smpte428,
+            5 => TransferFunction::Log100,
+            6 => TransferFunction::Log100Sqrt10,
+            7 => TransferFunction::Bt1361,
+            8 => TransferFunction::Smpte240,
+            9 => TransferFunction::Pq,
+            10 => TransferFunction::Hlg,
             _ => TransferFunction::Srgb,
         }
     }
@@ -150,6 +346,12 @@ impl TransferFunction {
             TransferFunction::Gamma2p8 => gamma2p8_to_linear(v),
             TransferFunction::Gamma2p2 => gamma2p2_to_linear(v),
             TransferFunction::Smpte428 => smpte428_to_linear(v),
+            TransferFunction::Log100 => log100_to_linear(v),
+            TransferFunction::Log100Sqrt10 => log100_sqrt10_to_linear(v),
+            TransferFunction::Bt1361 => bt1361_to_linear(v),
+            TransferFunction::Smpte240 => smpte240_to_linear(v),
+            TransferFunction::Pq => pq_to_linear(v),
+            TransferFunction::Hlg => hlg_to_linear(v),
         }
     }
 
@@ -160,7 +362,13 @@ impl TransferFunction {
             TransferFunction::Rec709 => rec709_from_linear(v),
             TransferFunction::Gamma2p2 => gamma2p2_from_linear(v),
             TransferFunction::Gamma2p8 => gamma2p8_from_linear(v),
-            TransferFunction::Smpte428 => smpte428_to_linear(v),
+            TransferFunction::Smpte428 => smpte428_from_linear(v),
+            TransferFunction::Log100 => log100_from_linear(v),
+            TransferFunction::Log100Sqrt10 => log100_sqrt10_from_linear(v),
+            TransferFunction::Bt1361 => bt1361_from_linear(v),
+            TransferFunction::Smpte240 => smpte240_from_linear(v),
+            TransferFunction::Pq => pq_from_linear(v),
+            TransferFunction::Hlg => hlg_from_linear(v),
         }
     }
 }
diff --git a/src/image_to_jzazbz.rs b/src/image_to_jzazbz.rs
index dd7df02..315dc45 100644
--- a/src/image_to_jzazbz.rs
+++ b/src/image_to_jzazbz.rs
@@ -9,12 +9,11 @@ use crate::image::ImageConfiguration;
 use crate::neon::neon_image_to_jzazbz;
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 use crate::sse::sse_image_to_jzazbz;
-use crate::{Jzazbz, Jzczhz, Rgb, TransferFunction};
+use crate::{Jzazbz, Jzczhz, Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65};
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[repr(u8)]
@@ -53,7 +52,7 @@ fn channels_to_jzaz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     let channels = image_configuration.get_channels_count();
 
     let mut _wide_row_handle: Option<
-        unsafe fn(usize, *const u8, usize, u32, *mut f32, usize, f32, TransferFunction) -> usize,
+        unsafe fn(usize, *const f32, usize, u32, *mut f32, usize, f32) -> usize,
     > = None;
 
     #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
@@ -66,72 +65,88 @@ fn channels_to_jzaz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
         _wide_row_handle = Some(sse_image_to_jzazbz::<CHANNELS_CONFIGURATION, TARGET>);
     }
 
+    let dst_slice_safe_align = unsafe {
+        slice::from_raw_parts_mut(
+            dst.as_mut_ptr() as *mut u8,
+            dst_stride as usize * height as usize,
+        )
+    };
+
+    let mut lut_table = vec![0f32; 256];
+    for i in 0..256 {
+        lut_table[i] = transfer_function.linearize(i as f32 * (1. / 255.0));
+    }
+
     #[cfg(feature = "rayon")]
     {
-        let dst_slice_safe_align = unsafe {
-            slice::from_raw_parts_mut(
-                dst.as_mut_ptr() as *mut u8,
-                dst_stride as usize * height as usize,
-            )
-        };
         dst_slice_safe_align
             .par_chunks_exact_mut(dst_stride as usize)
             .zip(src.par_chunks_exact(src_stride as usize))
             .for_each(|(dst, src)| unsafe {
                 let mut _cx = 0usize;
 
-                let src_ptr = src.as_ptr();
                 let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
+                let mut linearized_row = vec![0f32; width as usize * channels];
+                for (linear_chunk, src_chunk) in linearized_row
+                    .chunks_exact_mut(channels)
+                    .zip(src.chunks_exact(channels))
+                {
+                    linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    if image_configuration.has_alpha() {
+                        linear_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunk[image_configuration.get_a_channel_offset()] as f32
+                                * (1. / 255.0);
+                    }
+                }
+
                 if let Some(dispatcher) = _wide_row_handle {
                     _cx = dispatcher(
                         _cx,
-                        src.as_ptr(),
+                        linearized_row.as_ptr(),
                         0,
                         width,
                         dst.as_mut_ptr() as *mut f32,
                         0,
                         display_luminance,
-                        transfer_function,
                     );
                 }
 
                 for x in _cx..width as usize {
                     let px = x * channels;
 
-                    let src = src_ptr.add(px);
-                    let r = src
-                        .add(image_configuration.get_r_channel_offset())
-                        .read_unaligned();
-                    let g = src
-                        .add(image_configuration.get_g_channel_offset())
-                        .read_unaligned();
-                    let b = src
-                        .add(image_configuration.get_b_channel_offset())
-                        .read_unaligned();
+                    let src = linearized_row.get_unchecked(px..);
+                    let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
+                    let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                    let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
 
-                    let rgb = Rgb::<u8>::new(r, g, b);
+                    let xyz = Xyz::from_linear_rgb(Rgb::<f32>::new(r, g, b), &SRGB_TO_XYZ_D65);
 
                     let dst_store = dst_ptr.add(px);
 
                     match target {
                         JzazbzTarget::Jzazbz => {
-                            let jzazbz = Jzazbz::from_rgb_with_luminance(
-                                rgb,
-                                display_luminance,
-                                transfer_function,
-                            );
+                            let jzazbz =
+                                Jzazbz::from_xyz_with_display_luminance(xyz, display_luminance);
 
                             dst_store.write_unaligned(jzazbz.jz);
                             dst_store.add(1).write_unaligned(jzazbz.az);
                             dst_store.add(2).write_unaligned(jzazbz.bz);
                         }
                         JzazbzTarget::Jzczhz => {
-                            let jzczhz = Jzczhz::from_rgb_with_luminance(
-                                rgb,
-                                display_luminance,
-                                transfer_function,
-                            );
+                            let jzczhz =
+                                Jzczhz::from_xyz_with_display_luminance(xyz, display_luminance);
 
                             dst_store.write_unaligned(jzczhz.jz);
                             dst_store.add(1).write_unaligned(jzczhz.cz);
@@ -140,12 +155,8 @@ fn channels_to_jzaz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
                     }
 
                     if image_configuration.has_alpha() {
-                        let a = src
-                            .add(image_configuration.get_a_channel_offset())
-                            .read_unaligned();
-                        let a_lin = a as f32 * (1f32 / 255f32);
-
-                        dst_store.add(3).write_unaligned(a_lin);
+                        let a = *src.get_unchecked(image_configuration.get_a_channel_offset());
+                        dst_store.add(3).write_unaligned(a);
                     }
                 }
             });
@@ -153,92 +164,88 @@ fn channels_to_jzaz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
 
     #[cfg(not(feature = "rayon"))]
     {
-        let mut src_offset = 0usize;
-        let mut dst_offset = 0usize;
+        for (dst, src) in dst_slice_safe_align
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src.chunks_exact(src_stride as usize))
+        {
+            unsafe {
+                let mut _cx = 0usize;
 
-        for _ in 0..height as usize {
-            let mut _cx = 0usize;
+                let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
-            let src_ptr = unsafe { src.as_ptr().add(src_offset) };
-            let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };
+                let mut linearized_row = vec![0f32; width as usize * channels];
+                for (linear_chunk, src_chunk) in linearized_row
+                    .chunks_exact_mut(channels)
+                    .zip(src.chunks_exact(channels))
+                {
+                    linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    if image_configuration.has_alpha() {
+                        linear_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunk[image_configuration.get_a_channel_offset()] as f32
+                                * (1. / 255.0);
+                    }
+                }
 
-            if let Some(dispatcher) = _wide_row_handle {
-                unsafe {
+                if let Some(dispatcher) = _wide_row_handle {
                     _cx = dispatcher(
                         _cx,
-                        src.as_ptr(),
-                        src_offset,
+                        linearized_row.as_ptr(),
+                        0,
                         width,
-                        dst.as_mut_ptr(),
-                        dst_offset,
+                        dst.as_mut_ptr() as *mut f32,
+                        0,
                         display_luminance,
-                        transfer_function,
                     );
                 }
-            }
 
-            for x in _cx..width as usize {
-                let px = x * channels;
-
-                let src = unsafe { src_ptr.add(px) };
-                let r = unsafe {
-                    src.add(image_configuration.get_r_channel_offset())
-                        .read_unaligned()
-                };
-                let g = unsafe {
-                    src.add(image_configuration.get_g_channel_offset())
-                        .read_unaligned()
-                };
-                let b = unsafe {
-                    src.add(image_configuration.get_b_channel_offset())
-                        .read_unaligned()
-                };
-
-                let rgb = Rgb::<u8>::new(r, g, b);
-
-                let dst_store = unsafe { dst_ptr.add(px) };
-
-                match target {
-                    JzazbzTarget::Jzazbz => {
-                        let jzazbz = Jzazbz::from_rgb_with_luminance(
-                            rgb,
-                            display_luminance,
-                            transfer_function,
-                        );
-                        unsafe {
+                for x in _cx..width as usize {
+                    let px = x * channels;
+
+                    let src = linearized_row.get_unchecked(px..);
+                    let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
+                    let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                    let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
+
+                    let xyz = Xyz::from_linear_rgb(Rgb::<f32>::new(r, g, b), &SRGB_TO_XYZ_D65);
+
+                    let dst_store = dst_ptr.add(px);
+
+                    match target {
+                        JzazbzTarget::Jzazbz => {
+                            let jzazbz =
+                                Jzazbz::from_xyz_with_display_luminance(xyz, display_luminance);
+
                             dst_store.write_unaligned(jzazbz.jz);
                             dst_store.add(1).write_unaligned(jzazbz.az);
                             dst_store.add(2).write_unaligned(jzazbz.bz);
                         }
-                    }
-                    JzazbzTarget::Jzczhz => {
-                        let jzczhz = Jzczhz::from_rgb_with_luminance(
-                            rgb,
-                            display_luminance,
-                            transfer_function,
-                        );
-                        unsafe {
+                        JzazbzTarget::Jzczhz => {
+                            let jzczhz =
+                                Jzczhz::from_xyz_with_display_luminance(xyz, display_luminance);
+
                             dst_store.write_unaligned(jzczhz.jz);
                             dst_store.add(1).write_unaligned(jzczhz.cz);
                             dst_store.add(2).write_unaligned(jzczhz.hz);
                         }
                     }
-                }
 
-                if image_configuration.has_alpha() {
-                    let a = unsafe {
-                        src.add(image_configuration.get_a_channel_offset())
-                            .read_unaligned()
-                    };
-                    let a_lin = a as f32 * (1f32 / 255f32);
-                    unsafe {
-                        dst_store.add(3).write_unaligned(a_lin);
+                    if image_configuration.has_alpha() {
+                        let a = *src.get_unchecked(image_configuration.get_a_channel_offset());
+                        dst_store.add(3).write_unaligned(a);
                     }
                 }
             }
-
-            src_offset += src_stride as usize;
-            dst_offset += dst_stride as usize;
         }
     }
 }
diff --git a/src/image_to_lalphabeta.rs b/src/image_to_lalphabeta.rs
index b59e30b..08f1def 100644
--- a/src/image_to_lalphabeta.rs
+++ b/src/image_to_lalphabeta.rs
@@ -5,12 +5,11 @@
  * // license that can be found in the LICENSE file.
  */
 use crate::image::ImageConfiguration;
-use crate::{Rgb, TransferFunction};
+use crate::{LAlphaBeta, Rgb, TransferFunction, SRGB_TO_XYZ_D65};
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[inline(always)]
@@ -27,50 +26,70 @@ fn channels_to_lalphabeta<const CHANNELS_CONFIGURATION: u8>(
 
     let channels = image_configuration.get_channels_count();
 
+    let mut lut_table = vec![0f32; 256];
+    for i in 0..256 {
+        lut_table[i] = transfer_function.linearize(i as f32 * (1. / 255.0));
+    }
+
+    let dst_slice_safe_align = unsafe {
+        slice::from_raw_parts_mut(
+            dst.as_mut_ptr() as *mut u8,
+            dst_stride as usize * height as usize,
+        )
+    };
+
     #[cfg(feature = "rayon")]
     {
-        let dst_slice_safe_align = unsafe {
-            slice::from_raw_parts_mut(
-                dst.as_mut_ptr() as *mut u8,
-                dst_stride as usize * height as usize,
-            )
-        };
         dst_slice_safe_align
             .par_chunks_exact_mut(dst_stride as usize)
             .zip(src.par_chunks_exact(src_stride as usize))
             .for_each(|(dst, src)| unsafe {
                 let mut _cx = 0usize;
 
-                let src_ptr = src.as_ptr();
+                let mut linearized_row = vec![0f32; width as usize * channels];
+                for (linear_chunk, src_chunk) in linearized_row
+                    .chunks_exact_mut(channels)
+                    .zip(src.chunks_exact(channels))
+                {
+                    linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    if image_configuration.has_alpha() {
+                        linear_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunk[image_configuration.get_g_channel_offset()] as f32
+                                * (1. / 255.0);
+                    }
+                }
+
                 let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
                 for x in _cx..width as usize {
                     let px = x * channels;
 
-                    let src = src_ptr.add(px);
-                    let r = src
-                        .add(image_configuration.get_r_channel_offset())
-                        .read_unaligned();
-                    let g = src
-                        .add(image_configuration.get_g_channel_offset())
-                        .read_unaligned();
-                    let b = src
-                        .add(image_configuration.get_b_channel_offset())
-                        .read_unaligned();
-
-                    let rgb = Rgb::<u8>::new(r, g, b);
+                    let src = linearized_row.get_unchecked(px..);
+                    let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
+                    let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                    let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
+
+                    let rgb = Rgb::<f32>::new(r, g, b);
                     let dst_store = dst_ptr.add(px);
-                    let lalphabeta = rgb.to_lalphabeta(transfer_function);
+                    let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65);
                     dst_store.write_unaligned(lalphabeta.l);
                     dst_store.add(1).write_unaligned(lalphabeta.alpha);
                     dst_store.add(2).write_unaligned(lalphabeta.beta);
 
                     if image_configuration.has_alpha() {
-                        let a = src
-                            .add(image_configuration.get_a_channel_offset())
-                            .read_unaligned();
-                        let a_lin = a as f32 * (1f32 / 255f32);
-                        dst_store.add(3).write_unaligned(a_lin);
+                        let a = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                        dst_store.add(3).write_unaligned(a);
                     }
                 }
             });
@@ -78,55 +97,60 @@ fn channels_to_lalphabeta<const CHANNELS_CONFIGURATION: u8>(
 
     #[cfg(not(feature = "rayon"))]
     {
-        let mut src_offset = 0usize;
-        let mut dst_offset = 0usize;
-
-        for _ in 0..height as usize {
-            let mut _cx = 0usize;
-
-            let src_ptr = unsafe { src.as_ptr().add(src_offset) };
-            let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };
-
-            for x in _cx..width as usize {
-                let px = x * channels;
-
-                let src = unsafe { src_ptr.add(px) };
-                let r = unsafe {
-                    src.add(image_configuration.get_r_channel_offset())
-                        .read_unaligned()
-                };
-                let g = unsafe {
-                    src.add(image_configuration.get_g_channel_offset())
-                        .read_unaligned()
-                };
-                let b = unsafe {
-                    src.add(image_configuration.get_b_channel_offset())
-                        .read_unaligned()
-                };
-
-                let rgb = Rgb::<u8>::new(r, g, b);
-                let dst_store = unsafe { dst_ptr.add(px) };
-                let lalphabeta = rgb.to_lalphabeta(transfer_function);
-                unsafe {
+        for (dst, src) in dst_slice_safe_align
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src.chunks_exact(src_stride as usize))
+        {
+            unsafe {
+                let mut _cx = 0usize;
+
+                let mut linearized_row = vec![0f32; width as usize * channels];
+                for (linear_chunk, src_chunk) in linearized_row
+                    .chunks_exact_mut(channels)
+                    .zip(src.chunks_exact(channels))
+                {
+                    linear_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    linear_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunk[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    if image_configuration.has_alpha() {
+                        linear_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunk[image_configuration.get_g_channel_offset()] as f32
+                                * (1. / 255.0);
+                    }
+                }
+
+                let dst_ptr = dst.as_mut_ptr() as *mut f32;
+
+                for x in _cx..width as usize {
+                    let px = x * channels;
+
+                    let src = linearized_row.get_unchecked(px..);
+                    let r = *src.get_unchecked(image_configuration.get_r_channel_offset());
+                    let g = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                    let b = *src.get_unchecked(image_configuration.get_b_channel_offset());
+
+                    let rgb = Rgb::<f32>::new(r, g, b);
+                    let dst_store = dst_ptr.add(px);
+                    let lalphabeta = LAlphaBeta::from_linear_rgb(rgb, &SRGB_TO_XYZ_D65);
                     dst_store.write_unaligned(lalphabeta.l);
                     dst_store.add(1).write_unaligned(lalphabeta.alpha);
                     dst_store.add(2).write_unaligned(lalphabeta.beta);
-                }
 
-                if image_configuration.has_alpha() {
-                    let a = unsafe {
-                        src.add(image_configuration.get_a_channel_offset())
-                            .read_unaligned()
-                    };
-                    let a_lin = a as f32 * (1f32 / 255f32);
-                    unsafe {
-                        dst_store.add(3).write_unaligned(a_lin);
+                    if image_configuration.has_alpha() {
+                        let a = *src.get_unchecked(image_configuration.get_g_channel_offset());
+                        dst_store.add(3).write_unaligned(a);
                     }
                 }
             }
-
-            src_offset += src_stride as usize;
-            dst_offset += dst_stride as usize;
         }
     }
 }
diff --git a/src/image_to_linear.rs b/src/image_to_linear.rs
index 0e9c7bc..0d46c98 100644
--- a/src/image_to_linear.rs
+++ b/src/image_to_linear.rs
@@ -65,23 +65,20 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
 
                 let rgb = Rgb::<u8>::new(r, g, b);
 
-                unsafe {
-                    dst.write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
-                    dst.add(1)
-                        .write_unaligned(*lut_table.get_unchecked(rgb.g as usize));
-                    dst.add(2)
-                        .write_unaligned(*lut_table.get_unchecked(rgb.b as usize));
-                }
+                dst.add(image_configuration.get_r_channel_offset())
+                    .write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
+                dst.add(image_configuration.get_g_channel_offset())
+                    .write_unaligned(*lut_table.get_unchecked(rgb.g as usize));
+                dst.add(image_configuration.get_b_channel_offset())
+                    .write_unaligned(*lut_table.get_unchecked(rgb.b as usize));
 
                 if USE_ALPHA && image_configuration.has_alpha() {
-                    let a = unsafe {
-                        src.add(image_configuration.get_a_channel_offset())
-                            .read_unaligned()
-                    };
+                    let a = src
+                        .add(image_configuration.get_a_channel_offset())
+                        .read_unaligned();
                     let a_lin = a as f32 * (1f32 / 255f32);
-                    unsafe {
-                        dst.add(3).write_unaligned(a_lin);
-                    }
+                    dst.add(image_configuration.get_a_channel_offset())
+                        .write_unaligned(a_lin);
                 }
             }
 
@@ -123,10 +120,11 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
 
                     let rgb = Rgb::<u8>::new(r, g, b);
 
-                    dst.write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
-                    dst.add(1)
+                    dst.add(image_configuration.get_r_channel_offset())
+                        .write_unaligned(*lut_table.get_unchecked(rgb.r as usize));
+                    dst.add(image_configuration.get_g_channel_offset())
                         .write_unaligned(*lut_table.get_unchecked(rgb.g as usize));
-                    dst.add(2)
+                    dst.add(image_configuration.get_b_channel_offset())
                         .write_unaligned(*lut_table.get_unchecked(rgb.b as usize));
 
                     if USE_ALPHA && image_configuration.has_alpha() {
@@ -134,7 +132,8 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
                             .add(image_configuration.get_a_channel_offset())
                             .read_unaligned();
                         let a_lin = a as f32 * (1f32 / 255f32);
-                        dst.add(3).write_unaligned(a_lin);
+                        dst.add(image_configuration.get_a_channel_offset())
+                            .write_unaligned(a_lin);
                     }
                 }
             });
diff --git a/src/image_to_linear_u8.rs b/src/image_to_linear_u8.rs
index 84b41ee..4db8e5a 100644
--- a/src/image_to_linear_u8.rs
+++ b/src/image_to_linear_u8.rs
@@ -45,31 +45,22 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
 
         for x in _cx..width as usize {
             let px = x * channels;
-            let r =
-                unsafe { *src_row.get_unchecked(px + image_configuration.get_r_channel_offset()) };
-            let g =
-                unsafe { *src_row.get_unchecked(px + image_configuration.get_g_channel_offset()) };
-            let b =
-                unsafe { *src_row.get_unchecked(px + image_configuration.get_b_channel_offset()) };
+            let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset());
+            let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset());
+            let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset());
 
             let rgb = Rgb::<u8>::new(r, g, b);
-            let mut rgb_f32 = rgb.to_rgb_f32();
-            rgb_f32 = rgb_f32.linearize(transfer_function);
-            let rgb = rgb_f32.to_u8();
-
-            unsafe {
-                *dst_row.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize);
-                *dst_row.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize);
-                *dst_row.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize);
-            }
+
+            *dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) =
+                *lut_table.get_unchecked(rgb.r as usize);
+            *dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) =
+                *lut_table.get_unchecked(rgb.g as usize);
+            *dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) =
+                *lut_table.get_unchecked(rgb.b as usize);
 
             if USE_ALPHA && image_configuration.has_alpha() {
-                let a = unsafe {
-                    *src_row.get_unchecked(px + image_configuration.get_a_channel_offset())
-                };
-                unsafe {
-                    *dst_row.get_unchecked_mut(px + 3) = a;
-                }
+                let a = *src_row.get_unchecked(px + image_configuration.get_a_channel_offset());
+                *dst_row.get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a;
             }
         }
     }
@@ -90,14 +81,18 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
 
                     let rgb = Rgb::<u8>::new(r, g, b);
 
-                    *dst_row.get_unchecked_mut(px) = *lut_table.get_unchecked(rgb.r as usize);
-                    *dst_row.get_unchecked_mut(px + 1) = *lut_table.get_unchecked(rgb.g as usize);
-                    *dst_row.get_unchecked_mut(px + 2) = *lut_table.get_unchecked(rgb.b as usize);
+                    *dst_row.get_unchecked_mut(px + image_configuration.get_r_channel_offset()) =
+                        *lut_table.get_unchecked(rgb.r as usize);
+                    *dst_row.get_unchecked_mut(px + image_configuration.get_g_channel_offset()) =
+                        *lut_table.get_unchecked(rgb.g as usize);
+                    *dst_row.get_unchecked_mut(px + image_configuration.get_b_channel_offset()) =
+                        *lut_table.get_unchecked(rgb.b as usize);
 
                     if USE_ALPHA && image_configuration.has_alpha() {
                         let a =
                             *src_row.get_unchecked(px + image_configuration.get_a_channel_offset());
-                        *dst_row.get_unchecked_mut(px + 3) = a;
+                        *dst_row
+                            .get_unchecked_mut(px + image_configuration.get_a_channel_offset()) = a;
                     }
                 }
             });
diff --git a/src/image_to_xyz_lab.rs b/src/image_to_xyz_lab.rs
index 5617d10..a0d210f 100644
--- a/src/image_to_xyz_lab.rs
+++ b/src/image_to_xyz_lab.rs
@@ -108,10 +108,21 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(src.chunks_exact(channels))
                     {
-                        dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                        dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                        dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
-                        dst_chunk[3] = src_chunks[3] as f32 * (1. / 255.0);
+                        dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunks[image_configuration.get_a_channel_offset()] as f32
+                                * (1. / 255.0);
                     }
 
                     if let Some(dispatcher) = _wide_row_handler {
@@ -189,9 +200,18 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(src.chunks_exact(channels))
                     {
-                        dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                        dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                        dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
+                        dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                            );
                     }
 
                     if let Some(dispatcher) = _wide_row_handler {
@@ -277,10 +297,21 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(src.chunks_exact(channels))
                     {
-                        dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                        dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                        dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
-                        dst_chunk[3] = src_chunks[3] as f32 * (1. / 255.0);
+                        dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_a_channel_offset()] =
+                            src_chunks[image_configuration.get_a_channel_offset()] as f32
+                                * (1. / 255.0);
                     }
 
                     if let Some(dispatcher) = _wide_row_handler {
@@ -360,9 +391,18 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(src.chunks_exact(channels))
                     {
-                        dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                        dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                        dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
+                        dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                            );
+                        dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                            .get_unchecked(
+                                src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                            );
                     }
 
                     if let Some(dispatcher) = _wide_row_handler {
diff --git a/src/image_xyza_laba.rs b/src/image_xyza_laba.rs
index 4d6f051..4b0ca84 100644
--- a/src/image_xyza_laba.rs
+++ b/src/image_xyza_laba.rs
@@ -11,7 +11,7 @@ use crate::neon::neon_channels_to_xyza_or_laba;
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 use crate::sse::sse_channels_to_xyza_laba;
 use crate::xyz_target::XyzTarget;
-use crate::{LCh, Lab, Luv, Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65};
+use crate::{LCh, Lab, Luv, Rgb, TransferFunction, Xyz};
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
@@ -79,10 +79,21 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
                     .chunks_exact_mut(channels)
                     .zip(src.chunks_exact(channels))
                 {
-                    dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                    dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                    dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
-                    dst_chunk[3] = src_chunks[3] as f32 * (1. / 255.0);
+                    dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_a_channel_offset()] =
+                        src_chunks[image_configuration.get_a_channel_offset()] as f32
+                            * (1. / 255.0);
                 }
 
                 if let Some(dispatcher) = _wide_row_handler {
@@ -160,10 +171,21 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
                     .chunks_exact_mut(channels)
                     .zip(src.chunks_exact(channels))
                 {
-                    dst_chunk[0] = *lut_table.get_unchecked(src_chunks[0] as usize);
-                    dst_chunk[1] = *lut_table.get_unchecked(src_chunks[1] as usize);
-                    dst_chunk[2] = *lut_table.get_unchecked(src_chunks[2] as usize);
-                    dst_chunk[3] = src_chunks[3] as f32 * (1. / 255.0);
+                    dst_chunk[image_configuration.get_r_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_r_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_g_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_g_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_b_channel_offset()] = *lut_table
+                        .get_unchecked(
+                            src_chunks[image_configuration.get_b_channel_offset()] as usize,
+                        );
+                    dst_chunk[image_configuration.get_a_channel_offset()] =
+                        src_chunks[image_configuration.get_a_channel_offset()] as f32
+                            * (1. / 255.0);
                 }
 
                 if let Some(dispatcher) = _wide_row_handler {
@@ -242,6 +264,8 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LAB(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn rgba_to_lab_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -249,6 +273,8 @@ pub fn rgba_to_lab_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Lab as u8 }>(
         src,
@@ -257,8 +283,8 @@ pub fn rgba_to_lab_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -275,6 +301,8 @@ pub fn rgba_to_lab_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LAB(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn bgra_to_lab_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -282,6 +310,8 @@ pub fn bgra_to_lab_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Lab as u8 }>(
         src,
@@ -290,8 +320,8 @@ pub fn bgra_to_lab_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -308,6 +338,8 @@ pub fn bgra_to_lab_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LAB(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn rgba_to_luv_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -315,6 +347,8 @@ pub fn rgba_to_luv_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Luv as u8 }>(
         src,
@@ -323,8 +357,8 @@ pub fn rgba_to_luv_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -341,6 +375,8 @@ pub fn rgba_to_luv_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LAB(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn bgra_to_luv_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -348,6 +384,8 @@ pub fn bgra_to_luv_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Luv as u8 }>(
         src,
@@ -356,8 +394,8 @@ pub fn bgra_to_luv_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -370,6 +408,8 @@ pub fn bgra_to_luv_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive XYZ(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn rgba_to_xyz_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -377,6 +417,8 @@ pub fn rgba_to_xyz_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Xyz as u8 }>(
         src,
@@ -385,8 +427,8 @@ pub fn rgba_to_xyz_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -399,6 +441,8 @@ pub fn rgba_to_xyz_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive XYZ data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn bgra_to_xyz_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -406,6 +450,8 @@ pub fn bgra_to_xyz_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Xyz as u8 }>(
         src,
@@ -414,8 +460,8 @@ pub fn bgra_to_xyz_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -428,6 +474,8 @@ pub fn bgra_to_xyz_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LCH(a) data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn rgba_to_lch_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -435,6 +483,8 @@ pub fn rgba_to_lch_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Lch as u8 }>(
         src,
@@ -443,8 +493,8 @@ pub fn rgba_to_lch_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -457,6 +507,8 @@ pub fn rgba_to_lch_with_alpha(
 /// * `height` - Image height
 /// * `dst` - A mutable slice to receive LCH data
 /// * `dst_stride` - Bytes per row for dst data
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn bgra_to_lch_with_alpha(
     src: &[u8],
     src_stride: u32,
@@ -464,6 +516,8 @@ pub fn bgra_to_lch_with_alpha(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     channels_to_xyz_with_alpha::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Lch as u8 }>(
         src,
@@ -472,7 +526,7 @@ pub fn bgra_to_lch_with_alpha(
         dst_stride,
         width,
         height,
-        &SRGB_TO_XYZ_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
diff --git a/src/jzazbz_to_image.rs b/src/jzazbz_to_image.rs
index ec6657a..850c038 100644
--- a/src/jzazbz_to_image.rs
+++ b/src/jzazbz_to_image.rs
@@ -15,7 +15,6 @@ use crate::{Jzazbz, Jzczhz, TransferFunction};
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[allow(clippy::type_complexity)]
@@ -33,7 +32,7 @@ fn jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     let target: JzazbzTarget = TARGET.into();
 
     let mut _wide_row_handle: Option<
-        unsafe fn(usize, *const f32, u32, *mut u8, u32, u32, f32, TransferFunction) -> usize,
+        unsafe fn(usize, *const f32, u32, *mut f32, u32, u32, f32) -> usize,
     > = None;
 
     #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
@@ -46,14 +45,22 @@ fn jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
         _wide_row_handle = Some(neon_jzazbz_to_image::<CHANNELS_CONFIGURATION, TARGET>);
     }
 
+    let mut lut_table = vec![0u8; 2049];
+    for i in 0..2049 {
+        lut_table[i] = (transfer_function.gamma(i as f32 * (1. / 2048.0)) * 255.)
+            .ceil()
+            .min(255.) as u8;
+    }
+
+    let src_slice_safe_align = unsafe {
+        slice::from_raw_parts(
+            src.as_ptr() as *const u8,
+            src_stride as usize * height as usize,
+        )
+    };
+
     #[cfg(feature = "rayon")]
     {
-        let src_slice_safe_align = unsafe {
-            slice::from_raw_parts(
-                src.as_ptr() as *const u8,
-                src_stride as usize * height as usize,
-            )
-        };
         dst.par_chunks_exact_mut(dst_stride as usize)
             .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize))
             .for_each(|(dst, src)| unsafe {
@@ -62,18 +69,18 @@ fn jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
                 let mut _cx = 0usize;
 
                 let src_ptr = src.as_ptr() as *mut f32;
-                let dst_ptr = dst.as_mut_ptr();
+
+                let mut transient_row = vec![0f32; width as usize * channels];
 
                 if let Some(dispatcher) = _wide_row_handle {
                     _cx = dispatcher(
                         _cx,
                         src.as_ptr() as *const f32,
                         0,
-                        dst.as_mut_ptr(),
+                        transient_row.as_mut_ptr(),
                         0,
                         width,
                         display_luminance,
-                        transfer_function,
                     );
                 }
 
@@ -86,26 +93,56 @@ fn jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
                         JzazbzTarget::Jzazbz => {
                             let jzazbz =
                                 Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance);
-                            jzazbz.to_rgb(transfer_function)
+                            jzazbz.to_linear_rgb()
                         }
                         JzazbzTarget::Jzczhz => {
                             let jzczhz = Jzczhz::new(l_x, l_y, l_z);
-                            jzczhz.to_rgb_with_luminance(display_luminance, transfer_function)
+                            jzczhz.to_linear_rgb_with_luminance(display_luminance)
                         }
                     };
 
-                    let dst = dst_ptr.add(x * channels);
-                    dst.add(image_configuration.get_r_channel_offset())
-                        .write_unaligned(rgb.r);
-                    dst.add(image_configuration.get_g_channel_offset())
-                        .write_unaligned(rgb.g);
-                    dst.add(image_configuration.get_b_channel_offset())
-                        .write_unaligned(rgb.b);
+                    let dst = transient_row.get_unchecked_mut((x * channels)..);
+                    *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
+                    *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
+                    *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
                     if image_configuration.has_alpha() {
                         let l_a = src_ptr.add(px + 3).read_unaligned();
-                        let a_value = (l_a * 255f32).max(0f32);
-                        dst.add(image_configuration.get_a_channel_offset())
-                            .write_unaligned(a_value as u8);
+                        *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = l_a;
+                    }
+                }
+
+                for (dst_chunk, src_chunks) in dst
+                    .chunks_exact_mut(channels)
+                    .zip(transient_row.chunks_exact(channels))
+                {
+                    let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+
+                    dst_chunk[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(r_cast as usize);
+                    dst_chunk[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(g_cast as usize);
+                    dst_chunk[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(b_cast as usize);
+
+                    if image_configuration.has_alpha() {
+                        let a_cast = (src_chunks[image_configuration.get_a_channel_offset()] * 255.)
+                            .min(255.)
+                            .max(0.) as u8;
+                        dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
                     }
                 }
             });
@@ -113,67 +150,93 @@ fn jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
 
     #[cfg(not(feature = "rayon"))]
     {
-        let mut src_offset = 0usize;
-        let mut dst_offset = 0usize;
+        for (dst, src) in dst
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src_slice_safe_align.chunks_exact(src_stride as usize))
+        {
+            unsafe {
+                let channels = image_configuration.get_channels_count();
 
-        let channels = image_configuration.get_channels_count();
+                let mut _cx = 0usize;
 
-        for _ in 0..height as usize {
-            let mut _cx = 0usize;
+                let src_ptr = src.as_ptr() as *mut f32;
 
-            let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 };
-            let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };
+                let mut transient_row = vec![0f32; width as usize * channels];
 
-            if let Some(dispatcher) = _wide_row_handle {
-                unsafe {
+                if let Some(dispatcher) = _wide_row_handle {
                     _cx = dispatcher(
                         _cx,
-                        src.as_ptr(),
-                        src_offset as u32,
-                        dst.as_mut_ptr(),
-                        dst_offset as u32,
+                        src.as_ptr() as *const f32,
+                        0,
+                        transient_row.as_mut_ptr(),
+                        0,
                         width,
                         display_luminance,
-                        transfer_function,
                     );
                 }
-            }
 
-            for x in _cx..width as usize {
-                let px = x * channels;
-                let l_x = unsafe { src_ptr.add(px).read_unaligned() };
-                let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() };
-                let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() };
-                let rgb = match target {
-                    JzazbzTarget::Jzazbz => {
-                        let jzazbz = Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance);
-                        jzazbz.to_rgb(transfer_function)
-                    }
-                    JzazbzTarget::Jzczhz => {
-                        let jzczhz = Jzczhz::new(l_x, l_y, l_z);
-                        jzczhz.to_rgb_with_luminance(display_luminance, transfer_function)
-                    }
-                };
-
-                unsafe {
-                    let dst = dst_ptr.add(x * channels);
-                    dst.add(image_configuration.get_r_channel_offset())
-                        .write_unaligned(rgb.r);
-                    dst.add(image_configuration.get_g_channel_offset())
-                        .write_unaligned(rgb.g);
-                    dst.add(image_configuration.get_b_channel_offset())
-                        .write_unaligned(rgb.b);
+                for x in _cx..width as usize {
+                    let px = x * channels;
+                    let l_x = src_ptr.add(px).read_unaligned();
+                    let l_y = src_ptr.add(px + 1).read_unaligned();
+                    let l_z = src_ptr.add(px + 2).read_unaligned();
+                    let rgb = match target {
+                        JzazbzTarget::Jzazbz => {
+                            let jzazbz =
+                                Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance);
+                            jzazbz.to_linear_rgb()
+                        }
+                        JzazbzTarget::Jzczhz => {
+                            let jzczhz = Jzczhz::new(l_x, l_y, l_z);
+                            jzczhz.to_linear_rgb_with_luminance(display_luminance)
+                        }
+                    };
+
+                    let dst = transient_row.get_unchecked_mut((x * channels)..);
+                    *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
+                    *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
+                    *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
                     if image_configuration.has_alpha() {
                         let l_a = src_ptr.add(px + 3).read_unaligned();
-                        let a_value = (l_a * 255f32).max(0f32);
-                        dst.add(image_configuration.get_a_channel_offset())
-                            .write_unaligned(a_value as u8);
+                        *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) = l_a;
                     }
                 }
-            }
 
-            src_offset += src_stride as usize;
-            dst_offset += dst_stride as usize;
+                for (dst_chunk, src_chunks) in dst
+                    .chunks_exact_mut(channels)
+                    .zip(transient_row.chunks_exact(channels))
+                {
+                    let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+
+                    dst_chunk[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(r_cast as usize);
+                    dst_chunk[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(g_cast as usize);
+                    dst_chunk[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(b_cast as usize);
+
+                    if image_configuration.has_alpha() {
+                        let a_cast = (src_chunks[image_configuration.get_a_channel_offset()] * 255.)
+                            .min(255.)
+                            .max(0.) as u8;
+                        dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
+                    }
+                }
+            }
         }
     }
 }
diff --git a/src/jzczhz.rs b/src/jzczhz.rs
index 89bbf37..ce28c9d 100644
--- a/src/jzczhz.rs
+++ b/src/jzczhz.rs
@@ -104,6 +104,20 @@ impl Jzczhz {
         jzazbz.to_rgb(transfer_function)
     }
 
+    /// Converts [Jzczhz] to linear [Rgb]
+    ///
+    /// # Arguments
+    /// `display_luminance` - display luminance
+    /// `transfer_function` - Transfer function to convert into linear colorspace and backwards
+    #[inline]
+    pub fn to_linear_rgb_with_luminance(
+        &self,
+        display_luminance: f32,
+    ) -> Rgb<f32> {
+        let jzazbz = self.to_jzazbz_with_luminance(display_luminance);
+        jzazbz.to_linear_rgb()
+    }
+
     /// Converts Jzczhz to *Xyz*
     #[inline]
     pub fn to_xyz(&self) -> Xyz {
@@ -111,13 +125,20 @@ impl Jzczhz {
         jzazbz.to_xyz()
     }
 
-    /// Converts *Xyz* to *Jzczhz*
+    /// Converts [Xyz] to [Jzczhz]
     #[inline]
     pub fn from_xyz(xyz: Xyz) -> Jzczhz {
         let jzazbz = Jzazbz::from_xyz(xyz);
         Jzczhz::from_jzazbz(jzazbz)
     }
 
+    /// Converts [Xyz] to [Jzczhz]
+    #[inline]
+    pub fn from_xyz_with_display_luminance(xyz: Xyz, luminance: f32) -> Jzczhz {
+        let jzazbz = Jzazbz::from_xyz_with_display_luminance(xyz, luminance);
+        Jzczhz::from_jzazbz(jzazbz)
+    }
+
     /// Computes distance for *Jzczhz*
     #[inline]
     pub fn distance(&self, other: Jzczhz) -> f32 {
diff --git a/src/lalphabeta.rs b/src/lalphabeta.rs
index eeeeca6..79b8e43 100644
--- a/src/lalphabeta.rs
+++ b/src/lalphabeta.rs
@@ -30,6 +30,13 @@ impl LAlphaBeta {
         LAlphaBeta::from_xyz(xyz)
     }
 
+    #[inline]
+    /// Converts linear [Rgb] to [LAlphaBeta] using [Xyz] matrix
+    pub fn from_linear_rgb(rgb: Rgb<f32>, matrix: &[[f32; 3]; 3]) -> LAlphaBeta {
+        let xyz = Xyz::from_linear_rgb(rgb, matrix);
+        LAlphaBeta::from_xyz(xyz)
+    }
+
     /// Converts XYZ to l-alpha-beta
     #[inline]
     pub fn from_xyz(xyz: Xyz) -> LAlphaBeta {
@@ -71,12 +78,19 @@ impl LAlphaBeta {
         Xyz::new(x, y, z)
     }
 
-    /// Converts l-alpha-beta to RGB
+    /// Converts l-alpha-beta to [Rgb]
     #[inline]
     pub fn to_rgb(&self, transfer_function: TransferFunction) -> Rgb<u8> {
         let xyz = self.to_xyz();
         xyz.to_rgb(&XYZ_TO_SRGB_D65, transfer_function)
     }
+
+    /// Converts l-alpha-beta to Linear [Rgb]
+    #[inline]
+    pub fn to_linear_rgb(&self, matrix: &[[f32; 3]; 3]) -> Rgb<f32> {
+        let xyz = self.to_xyz();
+        xyz.to_linear_rgb(matrix)
+    }
 }
 
 impl Index<usize> for LAlphaBeta {
diff --git a/src/lalphabeta_to_image.rs b/src/lalphabeta_to_image.rs
index 744e15c..9cf6656 100644
--- a/src/lalphabeta_to_image.rs
+++ b/src/lalphabeta_to_image.rs
@@ -5,12 +5,11 @@
  * // license that can be found in the LICENSE file.
  */
 use crate::image::ImageConfiguration;
-use crate::{LAlphaBeta, TransferFunction};
+use crate::{LAlphaBeta, Rgb, TransferFunction, XYZ_TO_SRGB_D65};
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 fn lalphabeta_to_image<const CHANNELS_CONFIGURATION: u8>(
@@ -26,21 +25,30 @@ fn lalphabeta_to_image<const CHANNELS_CONFIGURATION: u8>(
 
     let channels = image_configuration.get_channels_count();
 
+    let mut lut_table = vec![0u8; 2049];
+    for i in 0..2049 {
+        lut_table[i] = (transfer_function.gamma(i as f32 * (1. / 2048.0)) * 255.)
+            .round()
+            .min(255.) as u8;
+    }
+
+    let src_slice_safe_align = unsafe {
+        slice::from_raw_parts(
+            src.as_ptr() as *const u8,
+            src_stride as usize * height as usize,
+        )
+    };
+
     #[cfg(feature = "rayon")]
     {
-        let src_slice_safe_align = unsafe {
-            slice::from_raw_parts(
-                src.as_ptr() as *const u8,
-                src_stride as usize * height as usize,
-            )
-        };
         dst.par_chunks_exact_mut(dst_stride as usize)
             .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize))
             .for_each(|(dst, src)| unsafe {
                 let mut _cx = 0usize;
 
                 let src_ptr = src.as_ptr() as *mut f32;
-                let dst_ptr = dst.as_mut_ptr();
+
+                let mut transient_row = vec![0f32; width as usize * channels];
 
                 for x in _cx..width as usize {
                     let px = x * channels;
@@ -48,20 +56,45 @@ fn lalphabeta_to_image<const CHANNELS_CONFIGURATION: u8>(
                     let l_y = src_ptr.add(px + 1).read_unaligned();
                     let l_z = src_ptr.add(px + 2).read_unaligned();
                     let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z);
-                    let rgb = lalphabeta.to_rgb(transfer_function);
+                    let rgb = lalphabeta.to_linear_rgb(&XYZ_TO_SRGB_D65);
 
-                    let dst = dst_ptr.add(x * channels);
-                    dst.add(image_configuration.get_r_channel_offset())
-                        .write_unaligned(rgb.r);
-                    dst.add(image_configuration.get_g_channel_offset())
-                        .write_unaligned(rgb.g);
-                    dst.add(image_configuration.get_b_channel_offset())
-                        .write_unaligned(rgb.b);
+                    let dst = transient_row.get_unchecked_mut((x * channels)..);
+                    *dst.get_unchecked_mut(image_configuration.get_r_channel_offset()) = rgb.r;
+                    *dst.get_unchecked_mut(image_configuration.get_g_channel_offset()) = rgb.g;
+                    *dst.get_unchecked_mut(image_configuration.get_b_channel_offset()) = rgb.b;
                     if image_configuration.has_alpha() {
                         let l_a = src_ptr.add(px + 3).read_unaligned();
                         let a_value = (l_a * 255f32).max(0f32);
-                        dst.add(image_configuration.get_a_channel_offset())
-                            .write_unaligned(a_value as u8);
+                        *dst.get_unchecked_mut(image_configuration.get_a_channel_offset()) =
+                            a_value;
+                    }
+                }
+
+                for (dst, src) in dst
+                    .chunks_exact_mut(channels)
+                    .zip(transient_row.chunks_exact(channels))
+                {
+                    let r = src[image_configuration.get_r_channel_offset()];
+                    let g = src[image_configuration.get_g_channel_offset()];
+                    let b = src[image_configuration.get_b_channel_offset()];
+
+                    let rgb = (Rgb::<f32>::new(
+                        r.min(1f32).max(0f32),
+                        g.min(1f32).max(0f32),
+                        b.min(1f32).max(0f32),
+                    ) * Rgb::<f32>::dup(2048f32))
+                    .round()
+                    .cast::<u16>();
+
+                    dst[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.r.min(2048) as usize);
+                    dst[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.g.min(2048) as usize);
+                    dst[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.b.min(2048) as usize);
+                    if image_configuration.has_alpha() {
+                        dst[image_configuration.get_a_channel_offset()] =
+                            src[image_configuration.get_a_channel_offset()] as u8;
                     }
                 }
             });
diff --git a/src/linear_to_planar.rs b/src/linear_to_planar.rs
index 90ebe1d..8de2d91 100644
--- a/src/linear_to_planar.rs
+++ b/src/linear_to_planar.rs
@@ -5,16 +5,11 @@
  * // license that can be found in the LICENSE file.
  */
 
-#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-use crate::neon::linear_to_planar::neon_linear_plane_to_gamma;
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-use crate::sse::sse_linear_plane_to_gamma;
 use crate::TransferFunction;
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[allow(clippy::type_complexity)]
@@ -27,103 +22,68 @@ fn linear_to_gamma_channels(
     height: u32,
     transfer_function: TransferFunction,
 ) {
-    let mut _wide_row_handler: Option<
-        unsafe fn(usize, *const f32, u32, *mut u8, u32, u32, TransferFunction) -> usize,
-    > = None;
-
-    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-    {
-        _wide_row_handler = Some(neon_linear_plane_to_gamma);
+    let mut lut_table = vec![0u8; 2049];
+    for i in 0..2049 {
+        lut_table[i] = (transfer_function.gamma(i as f32 * (1. / 2048.0)) * 255.)
+            .ceil()
+            .min(255.) as u8;
     }
 
-    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-    if std::arch::is_x86_feature_detected!("sse4.1") {
-        _wide_row_handler = Some(sse_linear_plane_to_gamma);
-    }
+    let src_slice_safe_align = unsafe {
+        slice::from_raw_parts(
+            src.as_ptr() as *const u8,
+            src_stride as usize * height as usize,
+        )
+    };
 
     #[cfg(feature = "rayon")]
     {
-        let src_slice_safe_align = unsafe {
-            slice::from_raw_parts(
-                src.as_ptr() as *const u8,
-                src_stride as usize * height as usize,
-            )
-        };
         dst.par_chunks_exact_mut(dst_stride as usize)
             .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize))
             .for_each(|(dst, src)| unsafe {
                 let mut _cx = 0usize;
 
-                if let Some(dispatcher) = _wide_row_handler {
-                    _cx = dispatcher(
-                        _cx,
-                        src.as_ptr() as *const f32,
-                        0,
-                        dst.as_mut_ptr(),
-                        0,
-                        width,
-                        transfer_function,
-                    );
-                }
-
                 let src_ptr = src.as_ptr() as *const f32;
                 let dst_ptr = dst.as_mut_ptr();
 
                 for x in _cx..width as usize {
                     let px = x;
                     let src_slice = src_ptr.add(px);
-                    let pixel = src_slice.read_unaligned().min(1f32).max(0f32);
+                    let pixel =
+                        (src_slice.read_unaligned().min(1f32).max(0f32) * 2048f32).round() as usize;
 
                     let dst = dst_ptr.add(px);
-                    let transferred = transfer_function.gamma(pixel);
-                    let rgb8 = (transferred * 255f32).min(255f32).max(0f32) as u8;
+                    let transferred = *lut_table.get_unchecked(pixel.min(2048));
 
-                    dst.write_unaligned(rgb8);
+                    dst.write_unaligned(transferred);
                 }
             });
     }
 
     #[cfg(not(feature = "rayon"))]
     {
-        let mut src_offset = 0usize;
-        let mut dst_offset = 0usize;
-
-        for _ in 0..height as usize {
-            let mut _cx = 0usize;
-
-            if let Some(dispatcher) = _wide_row_handler {
-                unsafe {
-                    _cx = dispatcher(
-                        _cx,
-                        src.as_ptr(),
-                        src_offset as u32,
-                        dst.as_mut_ptr(),
-                        dst_offset as u32,
-                        width,
-                        transfer_function,
-                    );
-                }
-            }
+        for (dst, src) in dst
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src_slice_safe_align.chunks_exact(src_stride as usize))
+        {
+            unsafe {
+                let mut _cx = 0usize;
 
-            let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 };
-            let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };
+                let src_ptr = src.as_ptr() as *const f32;
+                let dst_ptr = dst.as_mut_ptr();
 
-            for x in _cx..width as usize {
-                let px = x;
-                let src_slice = unsafe { src_ptr.add(px) };
-                let pixel = unsafe { src_slice.read_unaligned() }.min(1f32).max(0f32);
+                for x in _cx..width as usize {
+                    let px = x;
+                    let src_slice = src_ptr.add(px);
+                    let pixel =
+                        (src_slice.read_unaligned().min(1f32).max(0f32) * 2048f32).round() as usize;
 
-                let dst = unsafe { dst_ptr.add(px) };
-                let transferred = transfer_function.gamma(pixel);
-                let rgb8 = (transferred * 255f32).min(255f32).max(0f32) as u8;
+                    let dst = dst_ptr.add(px);
+                    let transferred = *lut_table.get_unchecked(pixel.min(2048));
 
-                unsafe {
-                    dst.write_unaligned(rgb8);
+                    dst.write_unaligned(transferred);
                 }
             }
-
-            src_offset += src_stride as usize;
-            dst_offset += dst_stride as usize;
         }
     }
 }
diff --git a/src/neon/gamma_curves.rs b/src/neon/gamma_curves.rs
index f5b7833..c530a23 100644
--- a/src/neon/gamma_curves.rs
+++ b/src/neon/gamma_curves.rs
@@ -4,8 +4,7 @@
  * // Use of this source code is governed by a BSD-style
  * // license that can be found in the LICENSE file.
  */
-
-use crate::gamma_curves::TransferFunction;
+#![allow(dead_code)]
 use crate::neon::math::vpowq_n_f32;
 use std::arch::aarch64::*;
 
@@ -131,31 +130,3 @@ pub unsafe fn neon_gamma2p2_from_linear(linear: float32x4_t) -> float32x4_t {
 pub unsafe fn neon_gamma2p8_from_linear(linear: float32x4_t) -> float32x4_t {
     neon_pure_gamma_function(linear, 1f32 / 2.8f32)
 }
-
-#[inline(always)]
-pub unsafe fn neon_perform_linear_transfer(
-    transfer_function: TransferFunction,
-    v: float32x4_t,
-) -> float32x4_t {
-    match transfer_function {
-        TransferFunction::Srgb => neon_srgb_to_linear(v),
-        TransferFunction::Rec709 => neon_rec709_to_linear(v),
-        TransferFunction::Gamma2p2 => neon_gamma2p2_to_linear(v),
-        TransferFunction::Gamma2p8 => neon_gamma2p8_to_linear(v),
-        TransferFunction::Smpte428 => neon_smpte428_to_linear(v),
-    }
-}
-
-#[inline(always)]
-pub unsafe fn neon_perform_gamma_transfer(
-    transfer_function: TransferFunction,
-    v: float32x4_t,
-) -> float32x4_t {
-    match transfer_function {
-        TransferFunction::Srgb => neon_srgb_from_linear(v),
-        TransferFunction::Rec709 => neon_rec709_from_linear(v),
-        TransferFunction::Gamma2p2 => neon_gamma2p2_from_linear(v),
-        TransferFunction::Gamma2p8 => neon_gamma2p8_from_linear(v),
-        TransferFunction::Smpte428 => neon_smpte428_from_linear(v),
-    }
-}
diff --git a/src/neon/image_to_jzazbz.rs b/src/neon/image_to_jzazbz.rs
index beedf79..42a4623 100644
--- a/src/neon/image_to_jzazbz.rs
+++ b/src/neon/image_to_jzazbz.rs
@@ -7,11 +7,7 @@
 use crate::image::ImageConfiguration;
 use crate::image_to_jzazbz::JzazbzTarget;
 use crate::neon::math::{vcolorq_matrix_f32, vpowq_n_f32};
-use crate::neon::neon_perform_linear_transfer;
-use crate::{
-    load_u8_and_deinterleave, load_u8_and_deinterleave_half, load_u8_and_deinterleave_quarter,
-    TransferFunction, SRGB_TO_XYZ_D65,
-};
+use crate::{load_f32_and_deinterleave, SRGB_TO_XYZ_D65};
 use erydanos::{vatan2q_f32, vhypotq_fast_f32, visnanq_f32, vmlafq_f32, vpowq_f32};
 use std::arch::aarch64::*;
 
@@ -32,15 +28,8 @@ macro_rules! perceptual_quantizer {
 }
 
 macro_rules! triple_to_jzazbz {
-    ($r: expr, $g: expr, $b: expr, $transfer: expr, $target: expr, $luminance: expr
+    ($r: expr, $g: expr, $b: expr, $target: expr, $luminance: expr
     ) => {{
-        let r_f = vmulq_n_f32(vcvtq_f32_u32($r), 1f32 / 255f32);
-        let g_f = vmulq_n_f32(vcvtq_f32_u32($g), 1f32 / 255f32);
-        let b_f = vmulq_n_f32(vcvtq_f32_u32($b), 1f32 / 255f32);
-        let dl_l = neon_perform_linear_transfer($transfer, r_f);
-        let dl_m = neon_perform_linear_transfer($transfer, g_f);
-        let dl_s = neon_perform_linear_transfer($transfer, b_f);
-
         let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
             vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)),
             vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)),
@@ -53,7 +42,7 @@ macro_rules! triple_to_jzazbz {
             vdupq_n_f32(*SRGB_TO_XYZ_D65.get_unchecked(2).get_unchecked(2)),
         );
 
-        let (mut x, mut y, mut z) = vcolorq_matrix_f32(dl_l, dl_m, dl_s, x0, x1, x2, x3, x4, x5, x6, x7, x8);
+        let (mut x, mut y, mut z) = vcolorq_matrix_f32($r, $g, $b, x0, x1, x2, x3, x4, x5, x6, x7, x8);
 
         x = vmulq_n_f32(x, $luminance);
         y = vmulq_n_f32(y, $luminance);
@@ -111,13 +100,12 @@ macro_rules! triple_to_jzazbz {
 #[inline(always)]
 pub unsafe fn neon_image_to_jzazbz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     start_cx: usize,
-    src: *const u8,
+    src: *const f32,
     src_offset: usize,
     width: u32,
     dst: *mut f32,
     dst_offset: usize,
     display_luminance: f32,
-    transfer_function: TransferFunction,
 ) -> usize {
     let target: JzazbzTarget = TARGET.into();
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
@@ -126,206 +114,16 @@ pub unsafe fn neon_image_to_jzazbz<const CHANNELS_CONFIGURATION: u8, const TARGE
 
     let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;
 
-    while cx + 16 < width as usize {
-        let src_ptr = src.add(src_offset + cx * channels);
-        let (r_chan, g_chan, b_chan, a_chan) =
-            load_u8_and_deinterleave!(src_ptr, image_configuration);
-
-        let r_low = vmovl_u8(vget_low_u8(r_chan));
-        let g_low = vmovl_u8(vget_low_u8(g_chan));
-        let b_low = vmovl_u8(vget_low_u8(b_chan));
-
-        let r_low_low = vmovl_u16(vget_low_u16(r_low));
-        let g_low_low = vmovl_u16(vget_low_u16(g_low));
-        let b_low_low = vmovl_u16(vget_low_u16(b_low));
-
-        let (x_low_low, y_low_low, z_low_low) = triple_to_jzazbz!(
-            r_low_low,
-            g_low_low,
-            b_low_low,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        let a_low = vmovl_u8(vget_low_u8(a_chan));
-
-        if image_configuration.has_alpha() {
-            let a_low_low =
-                vmulq_n_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(a_low))), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_low_low, y_low_low, z_low_low, a_low_low);
-            vst4q_f32(dst_ptr.add(cx * channels), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_low_low, y_low_low, z_low_low);
-            vst3q_f32(dst_ptr.add(cx * channels), xyz_low_low);
-        }
-
-        let r_low_high = vmovl_high_u16(r_low);
-        let g_low_high = vmovl_high_u16(g_low);
-        let b_low_high = vmovl_high_u16(b_low);
-
-        let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!(
-            r_low_high,
-            g_low_high,
-            b_low_high,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_low_high = vmulq_n_f32(vcvtq_f32_u32(vmovl_high_u16(a_low)), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_low_high, y_low_high, z_low_high, a_low_high);
-            vst4q_f32(dst_ptr.add(cx * channels + 4 * channels), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_low_high, y_low_high, z_low_high);
-            vst3q_f32(dst_ptr.add(cx * channels + 4 * channels), xyz_low_low);
-        }
-
-        let r_high = vmovl_high_u8(r_chan);
-        let g_high = vmovl_high_u8(g_chan);
-        let b_high = vmovl_high_u8(b_chan);
-
-        let r_high_low = vmovl_u16(vget_low_u16(r_high));
-        let g_high_low = vmovl_u16(vget_low_u16(g_high));
-        let b_high_low = vmovl_u16(vget_low_u16(b_high));
-
-        let (x_high_low, y_high_low, z_high_low) = triple_to_jzazbz!(
-            r_high_low,
-            g_high_low,
-            b_high_low,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        let a_high = vmovl_high_u8(a_chan);
-
-        if image_configuration.has_alpha() {
-            let a_high_low = vmulq_n_f32(
-                vcvtq_f32_u32(vmovl_u16(vget_low_u16(a_high))),
-                1f32 / 255f32,
-            );
-
-            let xyz_low_low = float32x4x4_t(x_high_low, y_high_low, z_high_low, a_high_low);
-            vst4q_f32(dst_ptr.add(cx * channels + 4 * channels * 2), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_high_low, y_high_low, z_high_low);
-            vst3q_f32(dst_ptr.add(cx * channels + 4 * channels * 2), xyz_low_low);
-        }
-
-        let r_high_high = vmovl_high_u16(r_high);
-        let g_high_high = vmovl_high_u16(g_high);
-        let b_high_high = vmovl_high_u16(b_high);
-
-        let (x_high_high, y_high_high, z_high_high) = triple_to_jzazbz!(
-            r_high_high,
-            g_high_high,
-            b_high_high,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_high_high = vmulq_n_f32(vcvtq_f32_u32(vmovl_high_u16(a_high)), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_high_high, y_high_high, z_high_high, a_high_high);
-            vst4q_f32(dst_ptr.add(cx * channels + 4 * channels * 3), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_high_high, y_high_high, z_high_high);
-            vst3q_f32(dst_ptr.add(cx * channels + 4 * channels * 3), xyz_low_low);
-        }
-
-        cx += 16;
-    }
-
-    while cx + 8 < width as usize {
-        let src_ptr = src.add(src_offset + cx * channels);
-        let (r_chan, g_chan, b_chan, a_chan) =
-            load_u8_and_deinterleave_half!(src_ptr, image_configuration);
-
-        let r_low = vmovl_u8(vget_low_u8(r_chan));
-        let g_low = vmovl_u8(vget_low_u8(g_chan));
-        let b_low = vmovl_u8(vget_low_u8(b_chan));
-
-        let r_low_low = vmovl_u16(vget_low_u16(r_low));
-        let g_low_low = vmovl_u16(vget_low_u16(g_low));
-        let b_low_low = vmovl_u16(vget_low_u16(b_low));
-
-        let (x_low_low, y_low_low, z_low_low) = triple_to_jzazbz!(
-            r_low_low,
-            g_low_low,
-            b_low_low,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        let a_low = vmovl_u8(vget_low_u8(a_chan));
-
-        if image_configuration.has_alpha() {
-            let a_low_low =
-                vmulq_n_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(a_low))), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_low_low, y_low_low, z_low_low, a_low_low);
-            vst4q_f32(dst_ptr.add(cx * channels), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_low_low, y_low_low, z_low_low);
-            vst3q_f32(dst_ptr.add(cx * channels), xyz_low_low);
-        }
-
-        let r_low_high = vmovl_high_u16(r_low);
-        let g_low_high = vmovl_high_u16(g_low);
-        let b_low_high = vmovl_high_u16(b_low);
-
-        let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!(
-            r_low_high,
-            g_low_high,
-            b_low_high,
-            transfer_function,
-            target,
-            display_luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_low_high = vmulq_n_f32(vcvtq_f32_u32(vmovl_high_u16(a_low)), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_low_high, y_low_high, z_low_high, a_low_high);
-            vst4q_f32(dst_ptr.add(cx * channels + 4 * channels), xyz_low_low);
-        } else {
-            let xyz_low_low = float32x4x3_t(x_low_high, y_low_high, z_low_high);
-            vst3q_f32(dst_ptr.add(cx * channels + 4 * channels), xyz_low_low);
-        }
-
-        cx += 8;
-    }
-
     while cx + 4 < width as usize {
-        let src_ptr = src.add(src_offset + cx * channels);
+        let src_ptr = ((src as *const u8).add(src_offset) as *const f32).add(cx * channels);
         let (r_chan, g_chan, b_chan, a_chan) =
-            load_u8_and_deinterleave_quarter!(src_ptr, image_configuration);
-
-        let r_low = vmovl_u8(vget_low_u8(r_chan));
-        let g_low = vmovl_u8(vget_low_u8(g_chan));
-        let b_low = vmovl_u8(vget_low_u8(b_chan));
-
-        let r_low_low = vmovl_u16(vget_low_u16(r_low));
-        let g_low_low = vmovl_u16(vget_low_u16(g_low));
-        let b_low_low = vmovl_u16(vget_low_u16(b_low));
-
-        let (x_low_low, y_low_low, z_low_low) = triple_to_jzazbz!(
-            r_low_low,
-            g_low_low,
-            b_low_low,
-            transfer_function,
-            target,
-            display_luminance
-        );
+            load_f32_and_deinterleave!(src_ptr, image_configuration);
 
-        let a_low = vmovl_u8(vget_low_u8(a_chan));
+        let (x_low_low, y_low_low, z_low_low) =
+            triple_to_jzazbz!(r_chan, g_chan, b_chan, target, display_luminance);
 
         if image_configuration.has_alpha() {
-            let a_low_low =
-                vmulq_n_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(a_low))), 1f32 / 255f32);
-            let xyz_low_low = float32x4x4_t(x_low_low, y_low_low, z_low_low, a_low_low);
+            let xyz_low_low = float32x4x4_t(x_low_low, y_low_low, z_low_low, a_chan);
             vst4q_f32(dst_ptr.add(cx * channels), xyz_low_low);
         } else {
             let xyz_low_low = float32x4x3_t(x_low_low, y_low_low, z_low_low);
diff --git a/src/neon/jzazbz_to_image.rs b/src/neon/jzazbz_to_image.rs
index 79158f4..740169f 100644
--- a/src/neon/jzazbz_to_image.rs
+++ b/src/neon/jzazbz_to_image.rs
@@ -12,8 +12,7 @@ use erydanos::{vcosq_f32, visnanq_f32, vmlafq_f32, vpowq_f32, vsinq_f32};
 use crate::image::ImageConfiguration;
 use crate::image_to_jzazbz::JzazbzTarget;
 use crate::neon::math::{vcolorq_matrix_f32, vpowq_n_f32};
-use crate::neon::neon_perform_gamma_transfer;
-use crate::{load_f32_and_deinterleave_direct, TransferFunction, XYZ_TO_SRGB_D65};
+use crate::{load_f32_and_deinterleave_direct, XYZ_TO_SRGB_D65};
 
 macro_rules! perceptual_quantizer_inverse {
     ($color: expr) => {{
@@ -37,14 +36,11 @@ macro_rules! perceptual_quantizer_inverse {
 #[inline(always)]
 unsafe fn neon_jzazbz_gamma_vld<const CHANNELS_CONFIGURATION: u8>(
     src: *const f32,
-    transfer_function: TransferFunction,
     target: JzazbzTarget,
     luminance: f32,
-) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
-    let v_scale_alpha = vdupq_n_f32(255f32);
+) -> (float32x4_t, float32x4_t, float32x4_t, float32x4_t) {
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
-    let (jz, mut az, mut bz, mut a_f32) =
-        load_f32_and_deinterleave_direct!(src, image_configuration);
+    let (jz, mut az, mut bz, a_f32) = load_f32_and_deinterleave_direct!(src, image_configuration);
 
     if target == JzazbzTarget::Jzczhz {
         let cz = az;
@@ -111,32 +107,17 @@ unsafe fn neon_jzazbz_gamma_vld<const CHANNELS_CONFIGURATION: u8>(
 
     let (r_l, g_l, b_l) = vcolorq_matrix_f32(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8);
 
-    let mut r_f32 = neon_perform_gamma_transfer(transfer_function, r_l);
-    let mut g_f32 = neon_perform_gamma_transfer(transfer_function, g_l);
-    let mut b_f32 = neon_perform_gamma_transfer(transfer_function, b_l);
-    r_f32 = vmulq_f32(r_f32, v_scale_alpha);
-    g_f32 = vmulq_f32(g_f32, v_scale_alpha);
-    b_f32 = vmulq_f32(b_f32, v_scale_alpha);
-    if image_configuration.has_alpha() {
-        a_f32 = vmulq_f32(a_f32, v_scale_alpha);
-    }
-    (
-        vcvtaq_u32_f32(r_f32),
-        vcvtaq_u32_f32(g_f32),
-        vcvtaq_u32_f32(b_f32),
-        vcvtaq_u32_f32(a_f32),
-    )
+    (r_l, g_l, b_l, a_f32)
 }
 
 pub unsafe fn neon_jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     start_cx: usize,
     src: *const f32,
     src_offset: u32,
-    dst: *mut u8,
+    dst: *mut f32,
     dst_offset: u32,
     width: u32,
     display_luminance: f32,
-    transfer_function: TransferFunction,
 ) -> usize {
     let target: JzazbzTarget = TARGET.into();
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
@@ -145,198 +126,37 @@ pub unsafe fn neon_jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGE
 
     let luminance_scale: f32 = 1. / display_luminance;
 
-    while cx + 16 < width as usize {
-        let offset_src_ptr =
-            ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
-
-        let src_ptr_0 = offset_src_ptr;
-
-        let (r_row0_, g_row0_, b_row0_, a_row0_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_0,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let src_ptr_1 = offset_src_ptr.add(4 * channels);
-
-        let (r_row1_, g_row1_, b_row1_, a_row1_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_1,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels);
-
-        let (r_row2_, g_row2_, b_row2_, a_row2_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_2,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels);
-
-        let (r_row3_, g_row3_, b_row3_, a_row3_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_3,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));
-        let g_row01 = vcombine_u16(vqmovn_u32(g_row0_), vqmovn_u32(g_row1_));
-        let b_row01 = vcombine_u16(vqmovn_u32(b_row0_), vqmovn_u32(b_row1_));
-
-        let r_row23 = vcombine_u16(vqmovn_u32(r_row2_), vqmovn_u32(r_row3_));
-        let g_row23 = vcombine_u16(vqmovn_u32(g_row2_), vqmovn_u32(g_row3_));
-        let b_row23 = vcombine_u16(vqmovn_u32(b_row2_), vqmovn_u32(b_row3_));
-
-        let r_row = vcombine_u8(vqmovn_u16(r_row01), vqmovn_u16(r_row23));
-        let g_row = vcombine_u8(vqmovn_u16(g_row01), vqmovn_u16(g_row23));
-        let b_row = vcombine_u8(vqmovn_u16(b_row01), vqmovn_u16(b_row23));
-
-        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
-
-        if image_configuration.has_alpha() {
-            let a_row01 = vcombine_u16(vqmovn_u32(a_row0_), vqmovn_u32(a_row1_));
-            let a_row23 = vcombine_u16(vqmovn_u32(a_row2_), vqmovn_u32(a_row3_));
-            let a_row = vcombine_u8(vqmovn_u16(a_row01), vqmovn_u16(a_row23));
-            let store_rows = match image_configuration {
-                ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x16x4_t(r_row, g_row, b_row, a_row)
-                }
-                ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x16x4_t(b_row, g_row, r_row, a_row)
-                }
-            };
-            vst4q_u8(dst_ptr, store_rows);
-        } else {
-            let store_rows = match image_configuration {
-                ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x16x3_t(r_row, g_row, b_row)
-                }
-                ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x16x3_t(b_row, g_row, r_row)
-                }
-            };
-            vst3q_u8(dst_ptr, store_rows);
-        }
-
-        cx += 16;
-    }
-
-    while cx + 8 < width as usize {
-        let offset_src_ptr =
-            ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
-
-        let src_ptr_0 = offset_src_ptr;
-
-        let (r_row0_, g_row0_, b_row0_, a_row0_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_0,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let src_ptr_1 = offset_src_ptr.add(4 * channels);
-
-        let (r_row1_, g_row1_, b_row1_, a_row1_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_1,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));
-        let g_row01 = vcombine_u16(vqmovn_u32(g_row0_), vqmovn_u32(g_row1_));
-        let b_row01 = vcombine_u16(vqmovn_u32(b_row0_), vqmovn_u32(b_row1_));
-
-        let r_row = vqmovn_u16(r_row01);
-        let g_row = vqmovn_u16(g_row01);
-        let b_row = vqmovn_u16(b_row01);
-
-        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
-
-        if image_configuration.has_alpha() {
-            let a_row01 = vcombine_u16(vqmovn_u32(a_row0_), vqmovn_u32(a_row1_));
-            let a_row = vqmovn_u16(a_row01);
-            let store_rows = match image_configuration {
-                ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x8x4_t(r_row, g_row, b_row, a_row)
-                }
-                ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x8x4_t(b_row, g_row, r_row, a_row)
-                }
-            };
-            vst4_u8(dst_ptr, store_rows);
-        } else {
-            let store_rows = match image_configuration {
-                ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x8x3_t(r_row, g_row, b_row)
-                }
-                ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x8x3_t(b_row, g_row, r_row)
-                }
-            };
-            vst3_u8(dst_ptr, store_rows);
-        }
-
-        cx += 8;
-    }
-
     while cx + 4 < width as usize {
         let offset_src_ptr =
             ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
 
         let src_ptr_0 = offset_src_ptr;
 
-        let (r_row0_, g_row0_, b_row0_, a_row0_) = neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(
-            src_ptr_0,
-            transfer_function,
-            target,
-            luminance_scale,
-        );
-
-        let zeros = vdup_n_u16(0);
-
-        let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), zeros);
-        let g_row01 = vcombine_u16(vqmovn_u32(g_row0_), zeros);
-        let b_row01 = vcombine_u16(vqmovn_u32(b_row0_), zeros);
-
-        let r_row = vqmovn_u16(r_row01);
-        let g_row = vqmovn_u16(g_row01);
-        let b_row = vqmovn_u16(b_row01);
+        let (r_row0_, g_row0_, b_row0_, a_row0_) =
+            neon_jzazbz_gamma_vld::<CHANNELS_CONFIGURATION>(src_ptr_0, target, luminance_scale);
 
-        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
+        let dst_ptr = ((dst as *mut u8).add(dst_offset as usize) as *mut f32).add(cx * channels);
 
         if image_configuration.has_alpha() {
-            let a_row01 = vcombine_u16(vqmovn_u32(a_row0_), zeros);
-            let a_row = vqmovn_u16(a_row01);
             let store_rows = match image_configuration {
                 ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x8x4_t(r_row, g_row, b_row, a_row)
+                    float32x4x4_t(r_row0_, g_row0_, b_row0_, a_row0_)
                 }
                 ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x8x4_t(b_row, g_row, r_row, a_row)
+                    float32x4x4_t(b_row0_, g_row0_, r_row0_, a_row0_)
                 }
             };
-            let mut transient: [u8; 32] = [0; 32];
-            vst4_u8(transient.as_mut_ptr(), store_rows);
-            std::ptr::copy_nonoverlapping(transient.as_ptr(), dst_ptr, 4 * 4);
+            vst4q_f32(dst_ptr, store_rows);
         } else {
             let store_rows = match image_configuration {
                 ImageConfiguration::Rgb | ImageConfiguration::Rgba => {
-                    uint8x8x3_t(r_row, g_row, b_row)
+                    float32x4x3_t(r_row0_, g_row0_, b_row0_)
                 }
                 ImageConfiguration::Bgra | ImageConfiguration::Bgr => {
-                    uint8x8x3_t(b_row, g_row, r_row)
+                    float32x4x3_t(b_row0_, g_row0_, r_row0_)
                 }
             };
-            let mut transient: [u8; 24] = [0; 24];
-            vst3_u8(transient.as_mut_ptr(), store_rows);
-            std::ptr::copy_nonoverlapping(transient.as_ptr(), dst_ptr, 4 * 3);
+            vst3q_f32(dst_ptr, store_rows);
         }
 
         cx += 4;
diff --git a/src/neon/linear_to_planar.rs b/src/neon/linear_to_planar.rs
deleted file mode 100644
index cf09d8e..0000000
--- a/src/neon/linear_to_planar.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
- * //
- * // Use of this source code is governed by a BSD-style
- * // license that can be found in the LICENSE file.
- */
-
-use crate::neon::neon_perform_gamma_transfer;
-use crate::TransferFunction;
-use std::arch::aarch64::*;
-
-#[inline(always)]
-unsafe fn transfer_to_gamma(r: float32x4_t, transfer_function: TransferFunction) -> uint32x4_t {
-    vcvtaq_u32_f32(vmulq_n_f32(
-        neon_perform_gamma_transfer(transfer_function, r),
-        255f32,
-    ))
-}
-
-#[inline(always)]
-unsafe fn process_set(k: float32x4x4_t, transfer_function: TransferFunction) -> uint8x16_t {
-    let y0 = transfer_to_gamma(k.0, transfer_function);
-    let y1 = transfer_to_gamma(k.1, transfer_function);
-    let y2 = transfer_to_gamma(k.2, transfer_function);
-    let y3 = transfer_to_gamma(k.3, transfer_function);
-
-    let y_row01 = vcombine_u16(vqmovn_u32(y0), vqmovn_u32(y1));
-    let y_row23 = vcombine_u16(vqmovn_u32(y2), vqmovn_u32(y3));
-
-    vcombine_u8(vqmovn_u16(y_row01), vqmovn_u16(y_row23))
-}
-
-#[inline]
-pub unsafe fn neon_linear_plane_to_gamma(
-    start_cx: usize,
-    src: *const f32,
-    src_offset: u32,
-    dst: *mut u8,
-    dst_offset: u32,
-    width: u32,
-    transfer_function: TransferFunction,
-) -> usize {
-    let mut cx = start_cx;
-
-    while cx + 64 < width as usize {
-        let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx);
-
-        let pixel_row0 = vld1q_f32_x4(offset_src_ptr);
-        let pixel_row1 = vld1q_f32_x4(offset_src_ptr.add(16));
-        let pixel_row2 = vld1q_f32_x4(offset_src_ptr.add(32));
-        let pixel_row3 = vld1q_f32_x4(offset_src_ptr.add(48));
-
-        let set0 = process_set(pixel_row0, transfer_function);
-        let set1 = process_set(pixel_row1, transfer_function);
-        let set2 = process_set(pixel_row2, transfer_function);
-        let set3 = process_set(pixel_row3, transfer_function);
-
-        let dst_ptr = dst.add(dst_offset as usize + cx);
-
-        let pixel_set = uint8x16x4_t(set0, set1, set2, set3);
-        vst1q_u8_x4(dst_ptr, pixel_set);
-
-        cx += 64;
-    }
-
-    while cx + 16 < width as usize {
-        let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx);
-
-        let pixel_row = vld1q_f32_x4(offset_src_ptr);
-        let r_row = process_set(pixel_row, transfer_function);
-        let dst_ptr = dst.add(dst_offset as usize + cx);
-        vst1q_u8(dst_ptr, r_row);
-
-        cx += 16;
-    }
-
-    cx
-}
diff --git a/src/neon/mod.rs b/src/neon/mod.rs
index 22c8d9d..89eecc9 100644
--- a/src/neon/mod.rs
+++ b/src/neon/mod.rs
@@ -14,10 +14,8 @@ mod image_to_hsv;
 mod image_to_jzazbz;
 mod image_to_oklab;
 mod jzazbz_to_image;
-pub mod linear_to_planar;
 mod math;
 mod oklab_to_image;
-pub mod planar_to_linear;
 mod routines;
 mod sigmoidal;
 mod to_sigmoidal;
@@ -28,7 +26,6 @@ mod xyza_laba_to_image;
 
 pub use colors::*;
 pub use from_sigmoidal::neon_from_sigmoidal_row;
-pub use gamma_curves::*;
 pub use hsv_to_image::*;
 pub use image_to_hsv::*;
 pub use image_to_jzazbz::neon_image_to_jzazbz;
diff --git a/src/neon/planar_to_linear.rs b/src/neon/planar_to_linear.rs
deleted file mode 100644
index 490fdc3..0000000
--- a/src/neon/planar_to_linear.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
- * //
- * // Use of this source code is governed by a BSD-style
- * // license that can be found in the LICENSE file.
- */
-
-use crate::gamma_curves::TransferFunction;
-use crate::neon::*;
-use std::arch::aarch64::*;
-
-#[inline(always)]
-unsafe fn neon_to_linear(r: uint32x4_t, transfer_function: TransferFunction) -> float32x4_t {
-    let r_f = vmulq_n_f32(vcvtq_f32_u32(r), 1f32 / 255f32);
-    neon_perform_linear_transfer(transfer_function, r_f)
-}
-
-#[inline]
-unsafe fn process_pixels(pixels: uint8x16_t, transfer_function: TransferFunction) -> float32x4x4_t {
-    let r_low = vmovl_u8(vget_low_u8(pixels));
-
-    let r_low_low = vmovl_u16(vget_low_u16(r_low));
-
-    let x_low_low = neon_to_linear(r_low_low, transfer_function);
-
-    let r_low_high = vmovl_high_u16(r_low);
-
-    let x_low_high = neon_to_linear(r_low_high, transfer_function);
-
-    let r_high = vmovl_high_u8(pixels);
-
-    let r_high_low = vmovl_u16(vget_low_u16(r_high));
-
-    let x_high_low = neon_to_linear(r_high_low, transfer_function);
-
-    let r_high_high = vmovl_high_u16(r_high);
-
-    let x_high_high = neon_to_linear(r_high_high, transfer_function);
-    float32x4x4_t(x_low_low, x_low_high, x_high_low, x_high_high)
-}
-
-#[inline(always)]
-pub unsafe fn neon_plane_to_linear(
-    start_cx: usize,
-    src: *const u8,
-    src_offset: usize,
-    width: u32,
-    dst: *mut f32,
-    dst_offset: usize,
-    transfer_function: TransferFunction,
-) -> usize {
-    let mut cx = start_cx;
-
-    let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;
-
-    while cx + 64 < width as usize {
-        let src_ptr = src.add(src_offset + cx);
-        let pixels_row64 = vld1q_u8_x4(src_ptr);
-        let storing_row0 = process_pixels(pixels_row64.0, transfer_function);
-        vst1q_f32_x4(dst_ptr.add(cx), storing_row0);
-
-        let storing_row1 = process_pixels(pixels_row64.1, transfer_function);
-        vst1q_f32_x4(dst_ptr.add(cx + 16), storing_row1);
-
-        let storing_row2 = process_pixels(pixels_row64.2, transfer_function);
-        vst1q_f32_x4(dst_ptr.add(cx + 32), storing_row2);
-
-        let storing_row3 = process_pixels(pixels_row64.3, transfer_function);
-        vst1q_f32_x4(dst_ptr.add(cx + 48), storing_row3);
-
-        cx += 64;
-    }
-
-    while cx + 16 < width as usize {
-        let src_ptr = src.add(src_offset + cx);
-        let pixels = vld1q_u8(src_ptr);
-        let storing_row = process_pixels(pixels, transfer_function);
-        vst1q_f32_x4(dst_ptr.add(cx), storing_row);
-
-        cx += 16;
-    }
-
-    cx
-}
diff --git a/src/oklab_to_image.rs b/src/oklab_to_image.rs
index 35377dd..30e1210 100644
--- a/src/oklab_to_image.rs
+++ b/src/oklab_to_image.rs
@@ -114,16 +114,22 @@ fn oklab_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
                     .chunks_exact_mut(channels)
                     .zip(transient_row.chunks_exact_mut(channels))
                 {
-                    let rgb = (Rgb::<f32>::new(src_chunks[0], src_chunks[1], src_chunks[2])
-                        * Rgb::<f32>::dup(2048f32))
+                    let rgb = (Rgb::<f32>::new(
+                        src_chunks[image_configuration.get_r_channel_offset()],
+                        src_chunks[image_configuration.get_g_channel_offset()],
+                        src_chunks[image_configuration.get_b_channel_offset()],
+                    ) * Rgb::<f32>::dup(2048f32))
                     .cast::<u16>();
 
-                    dst_chunks[0] = *lut_table.get_unchecked(rgb.r as usize);
-                    dst_chunks[1] = *lut_table.get_unchecked(rgb.g as usize);
-                    dst_chunks[2] = *lut_table.get_unchecked(rgb.b as usize);
+                    dst_chunks[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.r as usize);
+                    dst_chunks[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.g as usize);
+                    dst_chunks[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.b as usize);
                     if image_configuration.has_alpha() {
                         let a_lin = (src_chunks[4] * 255f32).round() as u8;
-                        dst_chunks[0] = a_lin;
+                        dst_chunks[image_configuration.get_a_channel_offset()] = a_lin;
                     }
                 }
             });
@@ -131,8 +137,10 @@ fn oklab_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
 
     #[cfg(not(feature = "rayon"))]
     {
-        for (dst, src) in dst.chunks_exact_mut(dst_stride as usize)
-            .zip(src_slice_safe_align.chunks_exact(src_stride as usize)) {
+        for (dst, src) in dst
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src_slice_safe_align.chunks_exact(src_stride as usize))
+        {
             unsafe {
                 let mut _cx = 0usize;
 
@@ -175,16 +183,22 @@ fn oklab_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
                     .chunks_exact_mut(channels)
                     .zip(transient_row.chunks_exact_mut(channels))
                 {
-                    let rgb = (Rgb::<f32>::new(src_chunks[0], src_chunks[1], src_chunks[2])
-                        * Rgb::<f32>::dup(2048f32))
-                        .cast::<u16>();
+                    let rgb = (Rgb::<f32>::new(
+                        src_chunks[image_configuration.get_r_channel_offset()],
+                        src_chunks[image_configuration.get_g_channel_offset()],
+                        src_chunks[image_configuration.get_b_channel_offset()],
+                    ) * Rgb::<f32>::dup(2048f32))
+                    .cast::<u16>();
 
-                    dst_chunks[0] = *lut_table.get_unchecked(rgb.r as usize);
-                    dst_chunks[1] = *lut_table.get_unchecked(rgb.g as usize);
-                    dst_chunks[2] = *lut_table.get_unchecked(rgb.b as usize);
+                    dst_chunks[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.r as usize);
+                    dst_chunks[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.g as usize);
+                    dst_chunks[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(rgb.b as usize);
                     if image_configuration.has_alpha() {
                         let a_lin = (src_chunks[4] * 255f32).round() as u8;
-                        dst_chunks[0] = a_lin;
+                        dst_chunks[image_configuration.get_a_channel_offset()] = a_lin;
                     }
                 }
             }
diff --git a/src/planar_to_linear.rs b/src/planar_to_linear.rs
index fe888e0..51552d6 100644
--- a/src/planar_to_linear.rs
+++ b/src/planar_to_linear.rs
@@ -5,16 +5,11 @@
  * // license that can be found in the LICENSE file.
  */
 
-#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-use crate::neon::planar_to_linear::neon_plane_to_linear;
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-use crate::sse::sse_plane_to_linear;
 use crate::TransferFunction;
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
 use rayon::prelude::{ParallelSlice, ParallelSliceMut};
-#[cfg(feature = "rayon")]
 use std::slice;
 
 #[inline(always)]
@@ -28,28 +23,20 @@ fn channels_to_linear(
     height: u32,
     transfer_function: TransferFunction,
 ) {
-    let mut _wide_row_handler: Option<
-        unsafe fn(usize, *const u8, usize, u32, *mut f32, usize, TransferFunction) -> usize,
-    > = None;
-
-    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-    if std::arch::is_x86_feature_detected!("sse4.1") {
-        _wide_row_handler = Some(sse_plane_to_linear);
-    }
-
-    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
-    {
-        _wide_row_handler = Some(neon_plane_to_linear);
+    let dst_slice_safe_align = unsafe {
+        slice::from_raw_parts_mut(
+            dst.as_mut_ptr() as *mut u8,
+            dst_stride as usize * height as usize,
+        )
+    };
+
+    let mut lut_table = vec![0f32; 256];
+    for i in 0..256 {
+        lut_table[i] = transfer_function.linearize(i as f32 * (1. / 255.0));
     }
 
     #[cfg(feature = "rayon")]
     {
-        let dst_slice_safe_align = unsafe {
-            slice::from_raw_parts_mut(
-                dst.as_mut_ptr() as *mut u8,
-                dst_stride as usize * height as usize,
-            )
-        };
         dst_slice_safe_align
             .par_chunks_exact_mut(dst_stride as usize)
             .zip(src.par_chunks_exact(src_stride as usize))
@@ -59,16 +46,11 @@ fn channels_to_linear(
                 let src_ptr = src.as_ptr();
                 let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
-                if let Some(dispatcher) = _wide_row_handler {
-                    _cx = dispatcher(_cx, src_ptr, 0, width, dst_ptr, 0, transfer_function);
-                }
-
                 for x in _cx..width as usize {
                     let px = x;
                     let dst = dst_ptr.add(px);
                     let src = src_ptr.add(px);
-                    let pixel_f = src.read_unaligned() as f32 * (1. / 255.);
-                    let transferred = transfer_function.linearize(pixel_f);
+                    let transferred = *lut_table.get_unchecked(src.read_unaligned() as usize);
 
                     dst.write_unaligned(transferred);
                 }
@@ -77,43 +59,25 @@ fn channels_to_linear(
 
     #[cfg(not(feature = "rayon"))]
     {
-        let mut src_offset = 0usize;
-        let mut dst_offset = 0usize;
-
-        for _ in 0..height as usize {
-            let mut _cx = 0usize;
-
-            let src_ptr = unsafe { src.as_ptr().add(src_offset) };
-            let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 };
+        for (dst, src) in dst_slice_safe_align
+            .chunks_exact_mut(dst_stride as usize)
+            .zip(src.chunks_exact(src_stride as usize))
+        {
+            unsafe {
+                let mut _cx = 0usize;
 
-            if let Some(dispatcher) = _wide_row_handler {
-                unsafe {
-                    _cx = dispatcher(
-                        _cx,
-                        src.as_ptr(),
-                        src_offset,
-                        width,
-                        dst.as_mut_ptr(),
-                        dst_offset,
-                        transfer_function,
-                    );
-                }
-            }
+                let src_ptr = src.as_ptr();
+                let dst_ptr = dst.as_mut_ptr() as *mut f32;
 
-            for x in _cx..width as usize {
-                let px = x;
-                let dst = unsafe { dst_ptr.add(px) };
-                let src = unsafe { src_ptr.add(px) };
-                let pixel_f = unsafe { src.read_unaligned() as f32 } * (1. / 255.);
-                let transferred = transfer_function.linearize(pixel_f);
+                for x in _cx..width as usize {
+                    let px = x;
+                    let dst = dst_ptr.add(px);
+                    let src = src_ptr.add(px);
+                    let transferred = *lut_table.get_unchecked(src.read_unaligned());
 
-                unsafe {
                     dst.write_unaligned(transferred);
                 }
             }
-
-            src_offset += src_stride as usize;
-            dst_offset += dst_stride as usize;
         }
     }
 }
diff --git a/src/sse/gamma_curves.rs b/src/sse/gamma_curves.rs
index ee6ea00..c5f68e7 100644
--- a/src/sse/gamma_curves.rs
+++ b/src/sse/gamma_curves.rs
@@ -4,7 +4,7 @@
  * // Use of this source code is governed by a BSD-style
  * // license that can be found in the LICENSE file.
  */
-
+#![allow(dead_code)]
 use crate::gamma_curves::TransferFunction;
 use crate::sse::*;
 use erydanos::_mm_pow_ps;
@@ -143,29 +143,4 @@ pub unsafe fn sse_gamma2p2_from_linear(linear: __m128) -> __m128 {
 #[inline(always)]
 pub unsafe fn sse_gamma2p8_from_linear(linear: __m128) -> __m128 {
     sse_pure_gamma(linear, 1f32 / 2.8f32)
-}
-
-#[inline(always)]
-pub unsafe fn perform_sse_linear_transfer(
-    transfer_function: TransferFunction,
-    v: __m128,
-) -> __m128 {
-    match transfer_function {
-        TransferFunction::Srgb => sse_srgb_to_linear(v),
-        TransferFunction::Rec709 => sse_rec709_to_linear(v),
-        TransferFunction::Gamma2p2 => sse_gamma2p2_to_linear(v),
-        TransferFunction::Gamma2p8 => sse_gamma2p8_to_linear(v),
-        TransferFunction::Smpte428 => sse_smpte428_to_linear(v),
-    }
-}
-
-#[inline(always)]
-pub unsafe fn perform_sse_gamma_transfer(transfer_function: TransferFunction, v: __m128) -> __m128 {
-    match transfer_function {
-        TransferFunction::Srgb => sse_srgb_from_linear(v),
-        TransferFunction::Rec709 => sse_rec709_from_linear(v),
-        TransferFunction::Gamma2p2 => sse_gamma2p2_from_linear(v),
-        TransferFunction::Gamma2p8 => sse_gamma2p8_from_linear(v),
-        TransferFunction::Smpte428 => sse_smpte428_from_linear(v),
-    }
-}
+}
\ No newline at end of file
diff --git a/src/sse/image_to_jzazbz.rs b/src/sse/image_to_jzazbz.rs
index 053e896..5ec81f0 100644
--- a/src/sse/image_to_jzazbz.rs
+++ b/src/sse/image_to_jzazbz.rs
@@ -15,12 +15,12 @@ use erydanos::{_mm_atan2_ps, _mm_hypot_fast_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_p
 use crate::image::ImageConfiguration;
 use crate::image_to_jzazbz::JzazbzTarget;
 use crate::sse::{
-    _mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, perform_sse_linear_transfer,
-    sse_deinterleave_rgb, sse_deinterleave_rgba, sse_interleave_ps_rgb, sse_interleave_ps_rgba,
+    _mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, sse_interleave_ps_rgb, sse_interleave_ps_rgba,
 };
+use crate::sse::{sse_deinterleave_rgb_ps, sse_deinterleave_rgba_ps};
 use crate::{
-    load_u8_and_deinterleave, load_u8_and_deinterleave_half, store_and_interleave_v3_direct_f32,
-    store_and_interleave_v4_direct_f32, TransferFunction, SRGB_TO_XYZ_D65,
+    load_f32_and_deinterleave, store_and_interleave_v3_direct_f32,
+    store_and_interleave_v4_direct_f32, SRGB_TO_XYZ_D65,
 };
 
 macro_rules! perceptual_quantizer {
@@ -41,16 +41,8 @@ macro_rules! perceptual_quantizer {
 }
 
 macro_rules! triple_to_jzazbz {
-    ($r: expr, $g: expr, $b: expr, $transfer: expr, $target: expr, $luminance: expr
+    ($r: expr, $g: expr, $b: expr,$target: expr, $luminance: expr
     ) => {{
-        let u8_scale = _mm_set1_ps(1f32 / 255f32);
-        let r_f = _mm_mul_ps(_mm_cvtepi32_ps($r), u8_scale);
-        let g_f = _mm_mul_ps(_mm_cvtepi32_ps($g), u8_scale);
-        let b_f = _mm_mul_ps(_mm_cvtepi32_ps($b), u8_scale);
-        let r_linear = perform_sse_linear_transfer($transfer, r_f);
-        let g_linear = perform_sse_linear_transfer($transfer,g_f);
-        let b_linear = perform_sse_linear_transfer($transfer,b_f);
-
         let (x0, x1, x2, x3, x4, x5, x6, x7, x8) = (
             _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(0)),
             _mm_set1_ps(*SRGB_TO_XYZ_D65.get_unchecked(0).get_unchecked(1)),
@@ -64,7 +56,7 @@ macro_rules! triple_to_jzazbz {
         );
 
         let (mut x, mut y, mut z) = _mm_color_matrix_ps(
-            r_linear, g_linear, b_linear, x0, x1, x2, x3, x4, x5, x6, x7, x8,
+            $r, $g, $b, x0, x1, x2, x3, x4, x5, x6, x7, x8,
         );
 
         x = _mm_mul_ps(x, $luminance);
@@ -124,13 +116,12 @@ macro_rules! triple_to_jzazbz {
 #[target_feature(enable = "sse4.1")]
 pub unsafe fn sse_image_to_jzazbz<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     start_cx: usize,
-    src: *const u8,
+    src: *const f32,
     src_offset: usize,
     width: u32,
     dst: *mut f32,
     dst_offset: usize,
     display_luminance: f32,
-    transfer_function: TransferFunction,
 ) -> usize {
     let target: JzazbzTarget = TARGET.into();
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
@@ -141,197 +132,22 @@ pub unsafe fn sse_image_to_jzazbz<const CHANNELS_CONFIGURATION: u8, const TARGET
 
     let luminance = _mm_set1_ps(display_luminance);
 
-    while cx + 16 < width as usize {
-        let src_ptr = src.add(src_offset + cx * channels);
-        let (r_chan, g_chan, b_chan, a_chan) =
-            load_u8_and_deinterleave!(src_ptr, image_configuration);
-
-        let r_low = _mm_cvtepu8_epi16(r_chan);
-        let g_low = _mm_cvtepu8_epi16(g_chan);
-        let b_low = _mm_cvtepu8_epi16(b_chan);
-
-        let r_low_low = _mm_cvtepu16_epi32(r_low);
-        let g_low_low = _mm_cvtepu16_epi32(g_low);
-        let b_low_low = _mm_cvtepu16_epi32(b_low);
-
-        let (x_low_low, y_low_low, z_low_low) = triple_to_jzazbz!(
-            r_low_low,
-            g_low_low,
-            b_low_low,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        let a_low = _mm_cvtepu8_epi16(a_chan);
-
-        let u8_scale = _mm_set1_ps(1f32 / 255f32);
-
-        if image_configuration.has_alpha() {
-            let a_low_low = _mm_mul_ps(_mm_cvtepi32_ps(_mm_cvtepi16_epi32(a_low)), u8_scale);
-            let ptr = dst_ptr.add(cx * 4);
-            store_and_interleave_v4_direct_f32!(ptr, x_low_low, y_low_low, z_low_low, a_low_low);
-        } else {
-            let ptr = dst_ptr.add(cx * 3);
-            store_and_interleave_v3_direct_f32!(ptr, x_low_low, y_low_low, z_low_low);
-        }
-
-        let r_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(r_low));
-        let g_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(g_low));
-        let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low));
-
-        let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!(
-            r_low_high,
-            g_low_high,
-            b_low_high,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_low_high = _mm_mul_ps(
-                _mm_cvtepi32_ps(_mm_cvtepi16_epi32(_mm_srli_si128::<8>(a_low))),
-                u8_scale,
-            );
-
-            let ptr = dst_ptr.add(cx * 4 + 16);
-            store_and_interleave_v4_direct_f32!(
-                ptr, x_low_high, y_low_high, z_low_high, a_low_high
-            );
-        } else {
-            let ptr = dst_ptr.add(cx * 3 + 4 * 3);
-            store_and_interleave_v3_direct_f32!(ptr, x_low_high, y_low_high, z_low_high);
-        }
-
-        let r_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(r_chan));
-        let g_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(g_chan));
-        let b_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(b_chan));
-
-        let r_high_low = _mm_cvtepu16_epi32(r_high);
-        let g_high_low = _mm_cvtepu16_epi32(g_high);
-        let b_high_low = _mm_cvtepu16_epi32(b_high);
-
-        let (x_high_low, y_high_low, z_high_low) = triple_to_jzazbz!(
-            r_high_low,
-            g_high_low,
-            b_high_low,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        let a_high = _mm_cvtepu8_epi16(_mm_srli_si128::<8>(a_chan));
-
-        if image_configuration.has_alpha() {
-            let a_high_low = _mm_mul_ps(_mm_cvtepi32_ps(_mm_cvtepi16_epi32(a_high)), u8_scale);
-            let ptr = dst_ptr.add(cx * 4 + 4 * 4 * 2);
-            store_and_interleave_v4_direct_f32!(
-                ptr, x_high_low, y_high_low, z_high_low, a_high_low
-            );
-        } else {
-            let ptr = dst_ptr.add(cx * 3 + 4 * 3 * 2);
-            store_and_interleave_v3_direct_f32!(ptr, x_high_low, y_high_low, z_high_low);
-        }
-
-        let r_high_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(r_high));
-        let g_high_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(g_high));
-        let b_high_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_high));
-
-        let (x_high_high, y_high_high, z_high_high) = triple_to_jzazbz!(
-            r_high_high,
-            g_high_high,
-            b_high_high,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_high_high = _mm_mul_ps(
-                _mm_cvtepi32_ps(_mm_cvtepi16_epi32(_mm_srli_si128::<8>(a_high))),
-                u8_scale,
-            );
-            let ptr = dst_ptr.add(cx * 4 + 4 * 4 * 3);
-            store_and_interleave_v4_direct_f32!(
-                ptr,
-                x_high_high,
-                y_high_high,
-                z_high_high,
-                a_high_high
-            );
-        } else {
-            let ptr = dst_ptr.add(cx * 3 + 4 * 3 * 3);
-            store_and_interleave_v3_direct_f32!(ptr, x_high_high, y_high_high, z_high_high);
-        }
-
-        cx += 16;
-    }
-
-    while cx + 8 < width as usize {
-        let src_ptr = src.add(src_offset + cx * channels);
+    while cx + 4 < width as usize {
+        let src_ptr = ((src as *const u8).add(src_offset) as *const f32).add(cx * channels);
         let (r_chan, g_chan, b_chan, a_chan) =
-            load_u8_and_deinterleave_half!(src_ptr, image_configuration);
-
-        let r_low = _mm_cvtepu8_epi16(r_chan);
-        let g_low = _mm_cvtepu8_epi16(g_chan);
-        let b_low = _mm_cvtepu8_epi16(b_chan);
-
-        let r_low_low = _mm_cvtepu16_epi32(r_low);
-        let g_low_low = _mm_cvtepu16_epi32(g_low);
-        let b_low_low = _mm_cvtepu16_epi32(b_low);
-
-        let (x_low_low, y_low_low, z_low_low) = triple_to_jzazbz!(
-            r_low_low,
-            g_low_low,
-            b_low_low,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        let a_low = _mm_cvtepu8_epi16(a_chan);
-
-        let u8_scale = _mm_set1_ps(1f32 / 255f32);
+            load_f32_and_deinterleave!(src_ptr, image_configuration);
 
+        let (x_low_low, y_low_low, z_low_low) =
+            triple_to_jzazbz!(r_chan, g_chan, b_chan, target, luminance);
         if image_configuration.has_alpha() {
-            let a_low_low = _mm_mul_ps(_mm_cvtepi32_ps(_mm_cvtepi16_epi32(a_low)), u8_scale);
             let ptr = dst_ptr.add(cx * 4);
-            store_and_interleave_v4_direct_f32!(ptr, x_low_low, y_low_low, z_low_low, a_low_low);
+            store_and_interleave_v4_direct_f32!(ptr, x_low_low, y_low_low, z_low_low, a_chan);
         } else {
             let ptr = dst_ptr.add(cx * 3);
             store_and_interleave_v3_direct_f32!(ptr, x_low_low, y_low_low, z_low_low);
         }
 
-        let r_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(r_low));
-        let g_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(g_low));
-        let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low));
-
-        let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!(
-            r_low_high,
-            g_low_high,
-            b_low_high,
-            transfer_function,
-            target,
-            luminance
-        );
-
-        if image_configuration.has_alpha() {
-            let a_low_high = _mm_mul_ps(
-                _mm_cvtepi32_ps(_mm_cvtepi16_epi32(_mm_srli_si128::<8>(a_low))),
-                u8_scale,
-            );
-
-            let ptr = dst_ptr.add(cx * 4 + 16);
-            store_and_interleave_v4_direct_f32!(
-                ptr, x_low_high, y_low_high, z_low_high, a_low_high
-            );
-        } else {
-            let ptr = dst_ptr.add(cx * 3 + 4 * 3);
-            store_and_interleave_v3_direct_f32!(ptr, x_low_high, y_low_high, z_low_high);
-        }
-
-        cx += 8;
+        cx += 4;
     }
 
     cx
diff --git a/src/sse/jzazbz_to_image.rs b/src/sse/jzazbz_to_image.rs
index b101bee..edc745b 100644
--- a/src/sse/jzazbz_to_image.rs
+++ b/src/sse/jzazbz_to_image.rs
@@ -10,18 +10,18 @@ use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-use erydanos::{_mm_cos_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_pow_ps, _mm_sin_ps};
-
 use crate::image::ImageConfiguration;
 use crate::image_to_jzazbz::JzazbzTarget;
 use crate::sse::{
-    _mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, perform_sse_gamma_transfer,
-    sse_deinterleave_rgb_ps, sse_deinterleave_rgba_ps, sse_interleave_rgb, sse_interleave_rgba,
+    _mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, sse_deinterleave_rgb_ps,
+    sse_deinterleave_rgba_ps,
 };
+use crate::sse::{sse_interleave_ps_rgb, sse_interleave_ps_rgba};
 use crate::{
-    load_f32_and_deinterleave_direct, store_and_interleave_v3_half_u8, store_and_interleave_v3_u8,
-    store_and_interleave_v4_half_u8, store_and_interleave_v4_u8, TransferFunction, XYZ_TO_SRGB_D65,
+    load_f32_and_deinterleave_direct, store_and_interleave_v3_f32, store_and_interleave_v4_f32
+    , XYZ_TO_SRGB_D65,
 };
+use erydanos::{_mm_cos_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_pow_ps, _mm_sin_ps};
 
 macro_rules! perceptual_quantizer_inverse {
     ($color: expr) => {{
@@ -46,15 +46,12 @@ macro_rules! perceptual_quantizer_inverse {
 #[inline(always)]
 unsafe fn sse_jzazbz_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     src: *const f32,
-    transfer_function: TransferFunction,
     luminance_scale: __m128,
-) -> (__m128i, __m128i, __m128i, __m128i) {
+) -> (__m128, __m128, __m128, __m128) {
     let target: JzazbzTarget = TARGET.into();
-    let v_scale_alpha = _mm_set1_ps(255f32);
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
 
-    let (jz, mut az, mut bz, mut a_f32) =
-        load_f32_and_deinterleave_direct!(src, image_configuration);
+    let (jz, mut az, mut bz, a_f32) = load_f32_and_deinterleave_direct!(src, image_configuration);
 
     if target == JzazbzTarget::Jzczhz {
         let cz = az;
@@ -120,34 +117,7 @@ unsafe fn sse_jzazbz_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     );
 
     let (r_l, g_l, b_l) = _mm_color_matrix_ps(x, y, z, x0, x1, x2, x3, x4, x5, x6, x7, x8);
-
-    let mut r_f32 = perform_sse_gamma_transfer(transfer_function, r_l);
-    let mut g_f32 = perform_sse_gamma_transfer(transfer_function, g_l);
-    let mut b_f32 = perform_sse_gamma_transfer(transfer_function, b_l);
-    r_f32 = _mm_mul_ps(r_f32, v_scale_alpha);
-    g_f32 = _mm_mul_ps(g_f32, v_scale_alpha);
-    b_f32 = _mm_mul_ps(b_f32, v_scale_alpha);
-    if image_configuration.has_alpha() {
-        a_f32 = _mm_mul_ps(a_f32, v_scale_alpha);
-    }
-
-    const ROUNDING_FLAGS: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
-
-    if image_configuration.has_alpha() {
-        (
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(r_f32)),
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(g_f32)),
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(b_f32)),
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(a_f32)),
-        )
-    } else {
-        (
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(r_f32)),
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(g_f32)),
-            _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(b_f32)),
-            _mm_set1_epi32(255),
-        )
-    }
+    (r_l, g_l, b_l, a_f32)
 }
 
 #[target_feature(enable = "sse4.1")]
@@ -155,11 +125,10 @@ pub unsafe fn sse_jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET
     start_cx: usize,
     src: *const f32,
     src_offset: u32,
-    dst: *mut u8,
+    dst: *mut f32,
     dst_offset: u32,
     width: u32,
     display_luminance: f32,
-    transfer_function: TransferFunction,
 ) -> usize {
     let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
     let channels = image_configuration.get_channels_count();
@@ -167,113 +136,28 @@ pub unsafe fn sse_jzazbz_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET
 
     let luminance_scale = _mm_set1_ps(1. / display_luminance);
 
-    while cx + 16 < width as usize {
-        let offset_src_ptr =
-            ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
-
-        let src_ptr_0 = offset_src_ptr;
-
-        let (r_row0_, g_row0_, b_row0_, a_row0_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_0,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let src_ptr_1 = offset_src_ptr.add(4 * channels);
-
-        let (r_row1_, g_row1_, b_row1_, a_row1_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_1,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let src_ptr_2 = offset_src_ptr.add(4 * 2 * channels);
-
-        let (r_row2_, g_row2_, b_row2_, a_row2_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_2,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let src_ptr_3 = offset_src_ptr.add(4 * 3 * channels);
-
-        let (r_row3_, g_row3_, b_row3_, a_row3_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_3,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let r_row01 = _mm_packus_epi32(r_row0_, r_row1_);
-        let g_row01 = _mm_packus_epi32(g_row0_, g_row1_);
-        let b_row01 = _mm_packus_epi32(b_row0_, b_row1_);
-
-        let r_row23 = _mm_packus_epi32(r_row2_, r_row3_);
-        let g_row23 = _mm_packus_epi32(g_row2_, g_row3_);
-        let b_row23 = _mm_packus_epi32(b_row2_, b_row3_);
-
-        let r_row = _mm_packus_epi16(r_row01, r_row23);
-        let g_row = _mm_packus_epi16(g_row01, g_row23);
-        let b_row = _mm_packus_epi16(b_row01, b_row23);
-
-        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
-
-        if image_configuration.has_alpha() {
-            let a_row01 = _mm_packus_epi32(a_row0_, a_row1_);
-            let a_row23 = _mm_packus_epi32(a_row2_, a_row3_);
-            let a_row = _mm_packus_epi16(a_row01, a_row23);
-            store_and_interleave_v4_u8!(dst_ptr, image_configuration, r_row, g_row, b_row, a_row);
-        } else {
-            store_and_interleave_v3_u8!(dst_ptr, image_configuration, r_row, g_row, b_row);
-        }
-
-        cx += 16;
-    }
-
-    let zeros = _mm_setzero_si128();
-
     while cx + 8 < width as usize {
         let offset_src_ptr =
             ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
 
         let src_ptr_0 = offset_src_ptr;
 
-        let (r_row0_, g_row0_, b_row0_, a_row0_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_0,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let src_ptr_1 = offset_src_ptr.add(4 * channels);
-
-        let (r_row1_, g_row1_, b_row1_, a_row1_) = sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(
-            src_ptr_1,
-            transfer_function,
-            luminance_scale,
-        );
-
-        let r_row01 = _mm_packus_epi32(r_row0_, r_row1_);
-        let g_row01 = _mm_packus_epi32(g_row0_, g_row1_);
-        let b_row01 = _mm_packus_epi32(b_row0_, b_row1_);
-
-        let r_row = _mm_packus_epi16(r_row01, zeros);
-        let g_row = _mm_packus_epi16(g_row01, zeros);
-        let b_row = _mm_packus_epi16(b_row01, zeros);
+        let (r_row0_, g_row0_, b_row0_, a_row0_) =
+            sse_jzazbz_vld::<CHANNELS_CONFIGURATION, TARGET>(src_ptr_0, luminance_scale);
 
-        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
+        let dst_ptr = ((dst as *mut u8).add(dst_offset as usize) as *mut f32).add(cx * channels);
 
         if image_configuration.has_alpha() {
-            let a_row01 = _mm_packus_epi32(a_row0_, a_row1_);
-            let a_row = _mm_packus_epi16(a_row01, zeros);
-            store_and_interleave_v4_half_u8!(
+            store_and_interleave_v4_f32!(
                 dst_ptr,
                 image_configuration,
-                r_row,
-                g_row,
-                b_row,
-                a_row
+                r_row0_,
+                g_row0_,
+                b_row0_,
+                a_row0_
             );
         } else {
-            store_and_interleave_v3_half_u8!(dst_ptr, image_configuration, r_row, g_row, b_row);
+            store_and_interleave_v3_f32!(dst_ptr, image_configuration, r_row0_, g_row0_, b_row0_);
         }
 
         cx += 8;
diff --git a/src/sse/linear_to_planar.rs b/src/sse/linear_to_planar.rs
deleted file mode 100644
index fa5e615..0000000
--- a/src/sse/linear_to_planar.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
- * //
- * // Use of this source code is governed by a BSD-style
- * // license that can be found in the LICENSE file.
- */
-
-use crate::sse::{_mm_loadu_ps_x4, _mm_storeu_si128_x4, perform_sse_gamma_transfer};
-use crate::TransferFunction;
-#[cfg(target_arch = "x86")]
-use std::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use std::arch::x86_64::*;
-
-#[inline(always)]
-unsafe fn transfer_to_gamma(r: __m128, transfer_function: TransferFunction) -> __m128i {
-    const ROUNDING_FLAGS: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
-    _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(_mm_mul_ps(
-        perform_sse_gamma_transfer(transfer_function, r),
-        _mm_set1_ps(255f32),
-    )))
-}
-
-#[inline(always)]
-unsafe fn process_set(
-    k: (__m128, __m128, __m128, __m128),
-    transfer_function: TransferFunction,
-) -> __m128i {
-    let y0 = transfer_to_gamma(k.0, transfer_function);
-    let y1 = transfer_to_gamma(k.1, transfer_function);
-    let y2 = transfer_to_gamma(k.2, transfer_function);
-    let y3 = transfer_to_gamma(k.3, transfer_function);
-
-    let y_row01 = _mm_packus_epi32(y0, y1);
-    let y_row23 = _mm_packus_epi32(y2, y3);
-
-    _mm_packus_epi16(y_row01, y_row23)
-}
-
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn sse_linear_plane_to_gamma(
-    start_cx: usize,
-    src: *const f32,
-    src_offset: u32,
-    dst: *mut u8,
-    dst_offset: u32,
-    width: u32,
-    transfer_function: TransferFunction,
-) -> usize {
-    let mut cx = start_cx;
-
-    while cx + 64 < width as usize {
-        let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx);
-
-        let pixel_row0 = _mm_loadu_ps_x4(offset_src_ptr);
-        let pixel_row1 = _mm_loadu_ps_x4(offset_src_ptr.add(16));
-        let pixel_row2 = _mm_loadu_ps_x4(offset_src_ptr.add(32));
-        let pixel_row3 = _mm_loadu_ps_x4(offset_src_ptr.add(48));
-
-        let set0 = process_set(pixel_row0, transfer_function);
-        let set1 = process_set(pixel_row1, transfer_function);
-        let set2 = process_set(pixel_row2, transfer_function);
-        let set3 = process_set(pixel_row3, transfer_function);
-
-        let dst_ptr = dst.add(dst_offset as usize + cx);
-
-        _mm_storeu_si128_x4(dst_ptr, (set0, set1, set2, set3));
-
-        cx += 64;
-    }
-
-    while cx + 16 < width as usize {
-        let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx);
-
-        let pixel_row = _mm_loadu_ps_x4(offset_src_ptr);
-        let r_row = process_set(pixel_row, transfer_function);
-        let dst_ptr = dst.add(dst_offset as usize + cx);
-        _mm_storeu_si128(dst_ptr as *mut __m128i, r_row);
-
-        cx += 16;
-    }
-
-    cx
-}
diff --git a/src/sse/planar_to_linear.rs b/src/sse/planar_to_linear.rs
deleted file mode 100644
index 11d91b0..0000000
--- a/src/sse/planar_to_linear.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
- * //
- * // Use of this source code is governed by a BSD-style
- * // license that can be found in the LICENSE file.
- */
-
-use crate::sse::{_mm_loadu_si128_x4, _mm_storeu_ps_x4, perform_sse_linear_transfer};
-use crate::TransferFunction;
-#[cfg(target_arch = "x86")]
-use std::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use std::arch::x86_64::*;
-
-#[inline(always)]
-unsafe fn sse_to_linear(r: __m128i, transfer_function: TransferFunction) -> __m128 {
-    let r_f = _mm_mul_ps(_mm_cvtepi32_ps(r), _mm_set1_ps(1f32 / 255f32));
-    perform_sse_linear_transfer(transfer_function, r_f)
-}
-
-#[inline]
-unsafe fn process_pixels(
-    pixels: __m128i,
-    transfer_function: TransferFunction,
-) -> (__m128, __m128, __m128, __m128) {
-    let zeros = _mm_setzero_si128();
-    let r_low = _mm_unpacklo_epi8(pixels, zeros);
-
-    let r_low_low = _mm_unpacklo_epi16(r_low, zeros);
-
-    let x_low_low = sse_to_linear(r_low_low, transfer_function);
-
-    let r_low_high = _mm_unpackhi_epi16(r_low, zeros);
-
-    let x_low_high = sse_to_linear(r_low_high, transfer_function);
-
-    let r_high = _mm_unpackhi_epi8(pixels, zeros);
-
-    let r_high_low = _mm_unpacklo_epi16(r_high, zeros);
-
-    let x_high_low = sse_to_linear(r_high_low, transfer_function);
-
-    let r_high_high = _mm_unpackhi_epi16(r_high, zeros);
-
-    let x_high_high = sse_to_linear(r_high_high, transfer_function);
-
-    (x_low_low, x_low_high, x_high_low, x_high_high)
-}
-
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn sse_plane_to_linear(
-    start_cx: usize,
-    src: *const u8,
-    src_offset: usize,
-    width: u32,
-    dst: *mut f32,
-    dst_offset: usize,
-    transfer_function: TransferFunction,
-) -> usize {
-    let mut cx = start_cx;
-
-    let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;
-
-    while cx + 64 < width as usize {
-        let src_ptr = src.add(src_offset + cx);
-        let pixels_row64 = _mm_loadu_si128_x4(src_ptr);
-        let storing_row0 = process_pixels(pixels_row64.0, transfer_function);
-        _mm_storeu_ps_x4(dst_ptr.add(cx), storing_row0);
-
-        let storing_row1 = process_pixels(pixels_row64.1, transfer_function);
-        _mm_storeu_ps_x4(dst_ptr.add(cx + 16), storing_row1);
-
-        let storing_row2 = process_pixels(pixels_row64.2, transfer_function);
-        _mm_storeu_ps_x4(dst_ptr.add(cx + 32), storing_row2);
-
-        let storing_row3 = process_pixels(pixels_row64.3, transfer_function);
-        _mm_storeu_ps_x4(dst_ptr.add(cx + 48), storing_row3);
-
-        cx += 64;
-    }
-
-    while cx + 16 < width as usize {
-        let src_ptr = src.add(src_offset + cx);
-        let pixels = _mm_loadu_si128(src_ptr as *const __m128i);
-        let storing_row = process_pixels(pixels, transfer_function);
-        _mm_storeu_ps_x4(dst_ptr.add(cx), storing_row);
-
-        cx += 16;
-    }
-
-    cx
-}
diff --git a/src/xyz_lab_to_image.rs b/src/xyz_lab_to_image.rs
index 3cd011b..3d9acfa 100644
--- a/src/xyz_lab_to_image.rs
+++ b/src/xyz_lab_to_image.rs
@@ -159,17 +159,35 @@ fn xyz_to_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(transient_row.chunks_exact(channels))
                     {
-                        let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round() as usize;
-                        let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round() as usize;
-                        let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round() as usize;
-
-                        dst_chunk[0] = *lut_table.get_unchecked(r_cast.min(2048));
-                        dst_chunk[1] = *lut_table.get_unchecked(g_cast.min(2048));
-                        dst_chunk[2] = *lut_table.get_unchecked(b_cast.min(2048));
+                        let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+
+                        dst_chunk[image_configuration.get_r_channel_offset()] =
+                            *lut_table.get_unchecked(r_cast.min(2048));
+                        dst_chunk[image_configuration.get_g_channel_offset()] =
+                            *lut_table.get_unchecked(g_cast.min(2048));
+                        dst_chunk[image_configuration.get_b_channel_offset()] =
+                            *lut_table.get_unchecked(b_cast.min(2048));
 
                         if image_configuration.has_alpha() {
-                            let a_cast = (src_chunks[3] * 255.).min(255.).max(0.) as u8;
-                            dst_chunk[3] = a_cast;
+                            let a_cast = (src_chunks[image_configuration.get_a_channel_offset()]
+                                * 255.)
+                                .min(255.)
+                                .max(0.) as u8;
+                            dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
                         }
                     }
                 });
@@ -231,13 +249,28 @@ fn xyz_to_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(transient_row.chunks_exact(channels))
                     {
-                        let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round() as usize;
-                        let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round() as usize;
-                        let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round() as usize;
-
-                        dst_chunk[0] = *lut_table.get_unchecked(r_cast.min(2048));
-                        dst_chunk[1] = *lut_table.get_unchecked(g_cast.min(2048));
-                        dst_chunk[2] = *lut_table.get_unchecked(b_cast.min(2048));
+                        let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+
+                        dst_chunk[image_configuration.get_r_channel_offset()] =
+                            *lut_table.get_unchecked(r_cast.min(2048));
+                        dst_chunk[image_configuration.get_g_channel_offset()] =
+                            *lut_table.get_unchecked(g_cast.min(2048));
+                        dst_chunk[image_configuration.get_b_channel_offset()] =
+                            *lut_table.get_unchecked(b_cast.min(2048));
                     }
                 });
         }
@@ -319,17 +352,35 @@ fn xyz_to_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(transient_row.chunks_exact(channels))
                     {
-                        let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round() as usize;
-                        let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round() as usize;
-                        let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round() as usize;
-
-                        dst_chunk[0] = *lut_table.get_unchecked(r_cast.min(2048));
-                        dst_chunk[1] = *lut_table.get_unchecked(g_cast.min(2048));
-                        dst_chunk[2] = *lut_table.get_unchecked(b_cast.min(2048));
+                        let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+
+                        dst_chunk[image_configuration.get_r_channel_offset()] =
+                            *lut_table.get_unchecked(r_cast.min(2048));
+                        dst_chunk[image_configuration.get_g_channel_offset()] =
+                            *lut_table.get_unchecked(g_cast.min(2048));
+                        dst_chunk[image_configuration.get_b_channel_offset()] =
+                            *lut_table.get_unchecked(b_cast.min(2048));
 
                         if image_configuration.has_alpha() {
-                            let a_cast = (src_chunks[3] * 255.).min(255.).max(0.) as u8;
-                            dst_chunk[3] = a_cast;
+                            let a_cast = (src_chunks[image_configuration.get_a_channel_offset()]
+                                * 255.)
+                                .min(255.)
+                                .max(0.) as u8;
+                            dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
                         }
                     }
                 }
@@ -394,13 +445,28 @@ fn xyz_to_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                         .chunks_exact_mut(channels)
                         .zip(transient_row.chunks_exact(channels))
                     {
-                        let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round() as usize;
-                        let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round() as usize;
-                        let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round() as usize;
-
-                        dst_chunk[0] = *lut_table.get_unchecked(r_cast.min(2048));
-                        dst_chunk[1] = *lut_table.get_unchecked(g_cast.min(2048));
-                        dst_chunk[2] = *lut_table.get_unchecked(b_cast.min(2048));
+                        let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+                        let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                            .min(1.)
+                            .max(0.)
+                            * 2048f32)
+                            .round() as usize;
+
+                        dst_chunk[image_configuration.get_r_channel_offset()] =
+                            *lut_table.get_unchecked(r_cast.min(2048));
+                        dst_chunk[image_configuration.get_g_channel_offset()] =
+                            *lut_table.get_unchecked(g_cast.min(2048));
+                        dst_chunk[image_configuration.get_b_channel_offset()] =
+                            *lut_table.get_unchecked(b_cast.min(2048));
                     }
                 }
             }
diff --git a/src/xyza_laba_to_image.rs b/src/xyza_laba_to_image.rs
index 6e7048c..8146304 100644
--- a/src/xyza_laba_to_image.rs
+++ b/src/xyza_laba_to_image.rs
@@ -14,7 +14,7 @@ use crate::neon::neon_xyza_to_image;
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 use crate::sse::sse_xyza_to_image;
 use crate::xyz_target::XyzTarget;
-use crate::{LCh, Lab, Luv, Xyz, XYZ_TO_SRGB_D65};
+use crate::{LCh, Lab, Luv, Xyz};
 #[cfg(feature = "rayon")]
 use rayon::iter::{IndexedParallelIterator, ParallelIterator};
 #[cfg(feature = "rayon")]
@@ -132,15 +132,32 @@ fn xyz_with_alpha_to_channels<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
                     .chunks_exact_mut(channels)
                     .zip(transient_row.chunks_exact(channels))
                 {
-                    let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round();
-                    let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round();
-                    let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round();
-                    let a_cast = (src_chunks[3] * 255.).min(255.).max(0.) as u8;
-
-                    dst_chunk[0] = *lut_table.get_unchecked(r_cast as usize);
-                    dst_chunk[1] = *lut_table.get_unchecked(g_cast as usize);
-                    dst_chunk[2] = *lut_table.get_unchecked(b_cast as usize);
-                    dst_chunk[3] = a_cast;
+                    let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let a_cast = (src_chunks[image_configuration.get_a_channel_offset()] * 255.)
+                        .min(255.)
+                        .max(0.) as u8;
+
+                    dst_chunk[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(r_cast as usize);
+                    dst_chunk[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(g_cast as usize);
+                    dst_chunk[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(b_cast as usize);
+                    dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
                 }
             });
     }
@@ -208,15 +225,32 @@ fn xyz_with_alpha_to_channels<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
                     .chunks_exact_mut(channels)
                     .zip(transient_row.chunks_exact(channels))
                 {
-                    let r_cast = (src_chunks[0].min(1.).max(0.) * 2048f32).round();
-                    let g_cast = (src_chunks[1].min(1.).max(0.) * 2048f32).round();
-                    let b_cast = (src_chunks[2].min(1.).max(0.) * 2048f32).round();
-                    let a_cast = (src_chunks[3] * 255.).min(255.).max(0.) as u8;
-
-                    dst_chunk[0] = *lut_table.get_unchecked(r_cast as usize);
-                    dst_chunk[1] = *lut_table.get_unchecked(g_cast as usize);
-                    dst_chunk[2] = *lut_table.get_unchecked(b_cast as usize);
-                    dst_chunk[3] = a_cast;
+                    let r_cast = (src_chunks[image_configuration.get_r_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let g_cast = (src_chunks[image_configuration.get_g_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let b_cast = (src_chunks[image_configuration.get_b_channel_offset()]
+                        .min(1.)
+                        .max(0.)
+                        * 2048f32)
+                        .round();
+                    let a_cast = (src_chunks[image_configuration.get_a_channel_offset()] * 255.)
+                        .min(255.)
+                        .max(0.) as u8;
+
+                    dst_chunk[image_configuration.get_r_channel_offset()] =
+                        *lut_table.get_unchecked(r_cast as usize);
+                    dst_chunk[image_configuration.get_g_channel_offset()] =
+                        *lut_table.get_unchecked(g_cast as usize);
+                    dst_chunk[image_configuration.get_b_channel_offset()] =
+                        *lut_table.get_unchecked(b_cast as usize);
+                    dst_chunk[image_configuration.get_a_channel_offset()] = a_cast;
                 }
             }
         }
@@ -232,6 +266,8 @@ fn xyz_with_alpha_to_channels<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn lab_with_alpha_to_rgba(
     src: &[f32],
     src_stride: u32,
@@ -239,6 +275,8 @@ pub fn lab_with_alpha_to_rgba(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Lab as u8 }>(
         src,
@@ -247,8 +285,8 @@ pub fn lab_with_alpha_to_rgba(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -261,6 +299,8 @@ pub fn lab_with_alpha_to_rgba(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn lab_with_alpha_to_bgra(
     src: &[f32],
     src_stride: u32,
@@ -268,6 +308,8 @@ pub fn lab_with_alpha_to_bgra(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Lab as u8 }>(
         src,
@@ -276,8 +318,8 @@ pub fn lab_with_alpha_to_bgra(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -290,6 +332,8 @@ pub fn lab_with_alpha_to_bgra(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn luv_with_alpha_to_rgba(
     src: &[f32],
     src_stride: u32,
@@ -297,6 +341,8 @@ pub fn luv_with_alpha_to_rgba(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Luv as u8 }>(
         src,
@@ -305,8 +351,8 @@ pub fn luv_with_alpha_to_rgba(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -321,6 +367,8 @@ pub fn luv_with_alpha_to_rgba(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn luv_with_alpha_to_bgra(
     src: &[f32],
     src_stride: u32,
@@ -328,6 +376,8 @@ pub fn luv_with_alpha_to_bgra(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Lab as u8 }>(
         src,
@@ -336,8 +386,8 @@ pub fn luv_with_alpha_to_bgra(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -350,6 +400,8 @@ pub fn luv_with_alpha_to_bgra(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn xyz_with_alpha_to_rgba(
     src: &[f32],
     src_stride: u32,
@@ -357,6 +409,8 @@ pub fn xyz_with_alpha_to_rgba(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Xyz as u8 }>(
         src,
@@ -365,8 +419,8 @@ pub fn xyz_with_alpha_to_rgba(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -379,6 +433,8 @@ pub fn xyz_with_alpha_to_rgba(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn xyz_with_alpha_to_bgra(
     src: &[f32],
     src_stride: u32,
@@ -386,6 +442,8 @@ pub fn xyz_with_alpha_to_bgra(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Xyz as u8 }>(
         src,
@@ -394,8 +452,8 @@ pub fn xyz_with_alpha_to_bgra(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -408,6 +466,8 @@ pub fn xyz_with_alpha_to_bgra(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn lch_with_alpha_to_rgba(
     src: &[f32],
     src_stride: u32,
@@ -415,6 +475,8 @@ pub fn lch_with_alpha_to_rgba(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Rgba as u8 }, { XyzTarget::Lch as u8 }>(
         src,
@@ -423,8 +485,8 @@ pub fn lch_with_alpha_to_rgba(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }
 
@@ -437,6 +499,8 @@ pub fn lch_with_alpha_to_rgba(
 /// * `dst_stride` - Bytes per row for dst data
 /// * `width` - Image width
 /// * `height` - Image height
+/// * `matrix` - Transformation matrix from RGB to XYZ. If you don't have specific just pick `XYZ_TO_SRGB_D65`
+/// * `transfer_function` - Transfer function. If you don't have specific pick `Srgb`
 pub fn lch_with_alpha_to_bgra(
     src: &[f32],
     src_stride: u32,
@@ -444,6 +508,8 @@ pub fn lch_with_alpha_to_bgra(
     dst_stride: u32,
     width: u32,
     height: u32,
+    matrix: &[[f32; 3]; 3],
+    transfer_function: TransferFunction,
 ) {
     xyz_with_alpha_to_channels::<{ ImageConfiguration::Bgra as u8 }, { XyzTarget::Lch as u8 }>(
         src,
@@ -452,7 +518,7 @@ pub fn lch_with_alpha_to_bgra(
         dst_stride,
         width,
         height,
-        &XYZ_TO_SRGB_D65,
-        TransferFunction::Srgb,
+        matrix,
+        transfer_function,
     );
 }