diff --git a/src/avx/to_xyz_lab.rs b/src/avx/to_xyz_lab.rs
index d89348d..21b9634 100644
--- a/src/avx/to_xyz_lab.rs
+++ b/src/avx/to_xyz_lab.rs
@@ -5,13 +5,10 @@ use std::arch::x86_64::*;
 
 use crate::avx::gamma_curves::get_avx2_linear_transfer;
 use crate::avx::*;
-#[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
-#[allow(unused_imports)]
 use crate::image::ImageConfiguration;
-#[allow(unused_imports)]
-use crate::image_to_xyz_lab::XyzTarget;
 use crate::luv::{LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y};
+use crate::xyz_target::XyzTarget;
 
 #[inline(always)]
 unsafe fn avx2_triple_to_xyz(
@@ -200,6 +197,7 @@ pub unsafe fn avx2_image_to_xyz_lab<
                 y_low_low = u;
                 z_low_low = v;
             }
+            XyzTarget::LCH => {}
         }
 
         let write_dst_ptr = dst_ptr.add(cx * 3);
@@ -233,6 +231,7 @@ pub unsafe fn avx2_image_to_xyz_lab<
                 y_low_high = u;
                 z_low_high = v;
             }
+            XyzTarget::LCH => {}
         }
 
         let (v0, v1, v2) = avx2_interleave_rgb_ps(x_low_high, y_low_high, z_low_high);
@@ -267,6 +266,7 @@ pub unsafe fn avx2_image_to_xyz_lab<
                 y_high_low = u;
                 z_high_low = v;
             }
+            XyzTarget::LCH => {}
         }
 
         let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_low, y_high_low, z_high_low);
@@ -308,6 +308,7 @@ pub unsafe fn avx2_image_to_xyz_lab<
                 y_high_high = u;
                 z_high_high = v;
             }
+            XyzTarget::LCH => {}
         }
 
         let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_high, y_high_high, z_high_high);
diff --git a/src/avx/xyz_lab_to_image.rs b/src/avx/xyz_lab_to_image.rs
index 256f732..7c83cce 100644
--- a/src/avx/xyz_lab_to_image.rs
+++ b/src/avx/xyz_lab_to_image.rs
@@ -5,7 +5,7 @@ use crate::avx::{
     avx2_interleave_rgba_epi8, avx2_pack_s32, avx2_pack_u16,
 };
 use crate::image::ImageConfiguration;
-use crate::image_to_xyz_lab::XyzTarget;
+use crate::xyz_target::XyzTarget;
 use crate::TransferFunction;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
diff --git a/src/avx/xyza_laba_to_image.rs b/src/avx/xyza_laba_to_image.rs
index c18efa0..79b2038 100644
--- a/src/avx/xyza_laba_to_image.rs
+++ b/src/avx/xyza_laba_to_image.rs
@@ -10,7 +10,7 @@ use crate::avx::{
     avx2_pack_u16,
 };
 use crate::image::ImageConfiguration;
-use crate::image_to_xyz_lab::XyzTarget;
+use crate::xyz_target::XyzTarget;
 use crate::TransferFunction;
 
 #[inline(always)]
diff --git a/src/image_to_xyz_lab.rs b/src/image_to_xyz_lab.rs
index f73cf32..aa6de62 100644
--- a/src/image_to_xyz_lab.rs
+++ b/src/image_to_xyz_lab.rs
@@ -112,40 +112,40 @@ fn channels_to_xyz<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                     }
                 }
             }
+        }
 
-            #[cfg(all(
-                any(target_arch = "x86_64", target_arch = "x86"),
-                target_feature = "sse4.1"
-            ))]
-            unsafe {
-                if _has_sse {
-                    if USE_ALPHA {
-                        cx = sse_channels_to_xyz_or_lab::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
-                            cx,
-                            src.as_ptr(),
-                            src_offset,
-                            width,
-                            dst.as_mut_ptr(),
-                            dst_offset,
-                            a_channel.as_mut_ptr(),
-                            a_offset,
-                            &matrix,
-                            transfer_function,
-                        )
-                    } else {
-                        cx = sse_channels_to_xyz_or_lab::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
-                            cx,
-                            src.as_ptr(),
-                            src_offset,
-                            width,
-                            dst.as_mut_ptr(),
-                            dst_offset,
-                            std::ptr::null_mut(),
-                            0usize,
-                            &matrix,
-                            transfer_function,
-                        )
-                    }
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "sse4.1"
+        ))]
+        unsafe {
+            if _has_sse {
+                if USE_ALPHA {
+                    cx = sse_channels_to_xyz_or_lab::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
+                        cx,
+                        src.as_ptr(),
+                        src_offset,
+                        width,
+                        dst.as_mut_ptr(),
+                        dst_offset,
+                        a_channel.as_mut_ptr(),
+                        a_offset,
+                        &matrix,
+                        transfer_function,
+                    )
+                } else {
+                    cx = sse_channels_to_xyz_or_lab::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
+                        cx,
+                        src.as_ptr(),
+                        src_offset,
+                        width,
+                        dst.as_mut_ptr(),
+                        dst_offset,
+                        std::ptr::null_mut(),
+                        0usize,
+                        &matrix,
+                        transfer_function,
+                    )
                 }
             }
         }
diff --git a/src/image_xyza_laba.rs b/src/image_xyza_laba.rs
index 3e634a4..60b08d7 100644
--- a/src/image_xyza_laba.rs
+++ b/src/image_xyza_laba.rs
@@ -56,24 +56,22 @@ fn channels_to_xyz_with_alpha<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
         #[allow(unused_mut)]
         let mut cx = 0usize;
 
-        if target != XyzTarget::LCH {
-            #[cfg(all(
-                any(target_arch = "x86_64", target_arch = "x86"),
-                target_feature = "sse4.1"
-            ))]
-            unsafe {
-                if _has_sse {
-                    cx = sse_channels_to_xyza_laba::<CHANNELS_CONFIGURATION, TARGET>(
-                        cx,
-                        src.as_ptr(),
-                        src_offset,
-                        width,
-                        dst.as_mut_ptr(),
-                        dst_offset,
-                        &matrix,
-                        transfer_function,
-                    );
-                }
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "sse4.1"
+        ))]
+        unsafe {
+            if _has_sse {
+                cx = sse_channels_to_xyza_laba::<CHANNELS_CONFIGURATION, TARGET>(
+                    cx,
+                    src.as_ptr(),
+                    src_offset,
+                    width,
+                    dst.as_mut_ptr(),
+                    dst_offset,
+                    &matrix,
+                    transfer_function,
+                );
             }
         }
 
diff --git a/src/sse/cie.rs b/src/sse/cie.rs
new file mode 100644
index 0000000..1981d93
--- /dev/null
+++ b/src/sse/cie.rs
@@ -0,0 +1,183 @@
+use crate::luv::{
+    LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y, LUV_MULTIPLIER_INVERSE_Y, LUV_WHITE_U_PRIME,
+    LUV_WHITE_V_PRIME,
+};
+use crate::sse::{
+    _mm_atan2_ps, _mm_cbrt_ps, _mm_color_matrix_ps, _mm_cos_ps, _mm_cube_ps, _mm_hypot_ps,
+    _mm_prefer_fma_ps, _mm_select_ps, _mm_sin_ps,
+};
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+#[inline(always)]
+pub(crate) unsafe fn sse_triple_to_xyz(
+    r: __m128i,
+    g: __m128i,
+    b: __m128i,
+    c1: __m128,
+    c2: __m128,
+    c3: __m128,
+    c4: __m128,
+    c5: __m128,
+    c6: __m128,
+    c7: __m128,
+    c8: __m128,
+    c9: __m128,
+    transfer: &unsafe fn(__m128) -> __m128,
+) -> (__m128, __m128, __m128) {
+    let u8_scale = _mm_set1_ps(1f32 / 255f32);
+    let r_f = _mm_mul_ps(_mm_cvtepi32_ps(r), u8_scale);
+    let g_f = _mm_mul_ps(_mm_cvtepi32_ps(g), u8_scale);
+    let b_f = _mm_mul_ps(_mm_cvtepi32_ps(b), u8_scale);
+    let r_linear = transfer(r_f);
+    let g_linear = transfer(g_f);
+    let b_linear = transfer(b_f);
+
+    let (x, y, z) = _mm_color_matrix_ps(
+        r_linear, g_linear, b_linear, c1, c2, c3, c4, c5, c6, c7, c8, c9,
+    );
+    (x, y, z)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_triple_to_luv(
+    x: __m128,
+    y: __m128,
+    z: __m128,
+) -> (__m128, __m128, __m128) {
+    let zeros = _mm_setzero_ps();
+    let den = _mm_prefer_fma_ps(
+        _mm_prefer_fma_ps(x, z, _mm_set1_ps(3f32)),
+        y,
+        _mm_set1_ps(15f32),
+    );
+    let nan_mask = _mm_cmpeq_ps(den, _mm_set1_ps(0f32));
+    let l_low_mask = _mm_cmplt_ps(y, _mm_set1_ps(LUV_CUTOFF_FORWARD_Y));
+    let y_cbrt = _mm_cbrt_ps(y);
+    let l = _mm_select_ps(
+        l_low_mask,
+        _mm_mul_ps(y, _mm_set1_ps(LUV_MULTIPLIER_FORWARD_Y)),
+        _mm_prefer_fma_ps(_mm_set1_ps(-16f32), y_cbrt, _mm_set1_ps(116f32)),
+    );
+    let u_prime = _mm_div_ps(_mm_mul_ps(x, _mm_set1_ps(4f32)), den);
+    let v_prime = _mm_div_ps(_mm_mul_ps(y, _mm_set1_ps(9f32)), den);
+    let sub_u_prime = _mm_sub_ps(u_prime, _mm_set1_ps(crate::luv::LUV_WHITE_U_PRIME));
+    let sub_v_prime = _mm_sub_ps(v_prime, _mm_set1_ps(crate::luv::LUV_WHITE_V_PRIME));
+    let l13 = _mm_mul_ps(l, _mm_set1_ps(13f32));
+    let u = _mm_select_ps(nan_mask, zeros, _mm_mul_ps(l13, sub_u_prime));
+    let v = _mm_select_ps(nan_mask, zeros, _mm_mul_ps(l13, sub_v_prime));
+    (l, u, v)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_triple_to_lab(
+    x: __m128,
+    y: __m128,
+    z: __m128,
+) -> (__m128, __m128, __m128) {
+    let x = _mm_mul_ps(x, _mm_set1_ps(100f32 / 95.047f32));
+    let y = _mm_mul_ps(y, _mm_set1_ps(100f32 / 100f32));
+    let z = _mm_mul_ps(z, _mm_set1_ps(100f32 / 108.883f32));
+    let cbrt_x = _mm_cbrt_ps(x);
+    let cbrt_y = _mm_cbrt_ps(y);
+    let cbrt_z = _mm_cbrt_ps(z);
+    let s_1 = _mm_set1_ps(16.0 / 116.0);
+    let s_2 = _mm_set1_ps(7.787);
+    let lower_x = _mm_prefer_fma_ps(s_1, s_2, x);
+    let lower_y = _mm_prefer_fma_ps(s_1, s_2, y);
+    let lower_z = _mm_prefer_fma_ps(s_1, s_2, z);
+    let cutoff = _mm_set1_ps(0.008856f32);
+    let x = _mm_select_ps(_mm_cmpgt_ps(x, cutoff), cbrt_x, lower_x);
+    let y = _mm_select_ps(_mm_cmpgt_ps(y, cutoff), cbrt_y, lower_y);
+    let z = _mm_select_ps(_mm_cmpgt_ps(z, cutoff), cbrt_z, lower_z);
+    let l = _mm_prefer_fma_ps(_mm_set1_ps(-16.0f32), y, _mm_set1_ps(116.0f32));
+    let a = _mm_mul_ps(_mm_sub_ps(x, y), _mm_set1_ps(500f32));
+    let b = _mm_mul_ps(_mm_sub_ps(y, z), _mm_set1_ps(200f32));
+    (l, a, b)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_triple_to_lch(
+    x: __m128,
+    y: __m128,
+    z: __m128,
+) -> (__m128, __m128, __m128) {
+    let (luv_l, luv_u, luv_v) = sse_triple_to_luv(x, y, z);
+    let lch_c = _mm_hypot_ps(luv_u, luv_v);
+    let lch_h = _mm_atan2_ps(luv_v, luv_u);
+    (luv_l, lch_c, lch_h)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_lab_to_xyz(l: __m128, a: __m128, b: __m128) -> (__m128, __m128, __m128) {
+    let y = _mm_mul_ps(
+        _mm_add_ps(l, _mm_set1_ps(16f32)),
+        _mm_set1_ps(1f32 / 116f32),
+    );
+    let x = _mm_add_ps(_mm_mul_ps(a, _mm_set1_ps(1f32 / 500f32)), y);
+    let z = _mm_sub_ps(y, _mm_mul_ps(b, _mm_set1_ps(1f32 / 200f32)));
+    let x3 = _mm_cube_ps(x);
+    let y3 = _mm_cube_ps(y);
+    let z3 = _mm_cube_ps(z);
+    let kappa = _mm_set1_ps(0.008856f32);
+    let k_sub = _mm_set1_ps(16f32 / 116f32);
+    let mult_1 = _mm_set1_ps(1f32 / 7.787f32);
+    let low_x = _mm_mul_ps(_mm_sub_ps(x, k_sub), mult_1);
+    let low_y = _mm_mul_ps(_mm_sub_ps(y, k_sub), mult_1);
+    let low_z = _mm_mul_ps(_mm_sub_ps(z, k_sub), mult_1);
+
+    let x = _mm_select_ps(_mm_cmpgt_ps(x3, kappa), x3, low_x);
+    let y = _mm_select_ps(_mm_cmpgt_ps(y3, kappa), y3, low_y);
+    let z = _mm_select_ps(_mm_cmpgt_ps(z3, kappa), z3, low_z);
+    let x = _mm_mul_ps(x, _mm_set1_ps(95.047f32 / 100f32));
+    let z = _mm_mul_ps(z, _mm_set1_ps(108.883f32 / 100f32));
+    (x, y, z)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_luv_to_xyz(l: __m128, u: __m128, v: __m128) -> (__m128, __m128, __m128) {
+    let zeros = _mm_setzero_ps();
+    let zero_mask = _mm_cmpeq_ps(l, zeros);
+    let l13 = _mm_rcp_ps(_mm_mul_ps(l, _mm_set1_ps(13f32)));
+    let u = _mm_prefer_fma_ps(_mm_set1_ps(LUV_WHITE_U_PRIME), l13, u);
+    let v = _mm_prefer_fma_ps(_mm_set1_ps(LUV_WHITE_V_PRIME), l13, v);
+    let l_h = _mm_mul_ps(
+        _mm_add_ps(l, _mm_set1_ps(16f32)),
+        _mm_set1_ps(1f32 / 116f32),
+    );
+    let y_high = _mm_mul_ps(_mm_mul_ps(l_h, l_h), l_h);
+    let y_low = _mm_mul_ps(l, _mm_set1_ps(LUV_MULTIPLIER_INVERSE_Y));
+    let y = _mm_select_ps(
+        zero_mask,
+        zeros,
+        _mm_select_ps(_mm_cmpgt_ps(l, _mm_set1_ps(8f32)), y_high, y_low),
+    );
+    let zero_mask_2 = _mm_cmpeq_ps(v, zeros);
+    let den = _mm_rcp_ps(_mm_mul_ps(v, _mm_set1_ps(4f32)));
+    let mut x = _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(y, u), den), _mm_set1_ps(9f32));
+    x = _mm_select_ps(zero_mask, zeros, x);
+    x = _mm_select_ps(zero_mask_2, zeros, x);
+    let mut z = _mm_mul_ps(
+        _mm_mul_ps(
+            _mm_prefer_fma_ps(
+                _mm_prefer_fma_ps(_mm_set1_ps(12f32), _mm_set1_ps(-3f32), u),
+                v,
+                _mm_set1_ps(-20f32),
+            ),
+            y,
+        ),
+        den,
+    );
+    z = _mm_select_ps(zero_mask, zeros, z);
+    z = _mm_select_ps(zero_mask_2, zeros, z);
+    (x, y, z)
+}
+
+#[inline(always)]
+pub(crate) unsafe fn sse_lch_to_xyz(l: __m128, c: __m128, h: __m128) -> (__m128, __m128, __m128) {
+    let u = _mm_mul_ps(c, _mm_cos_ps(h));
+    let v = _mm_mul_ps(c, _mm_sin_ps(h));
+    sse_luv_to_xyz(l, u, v)
+}
diff --git a/src/sse/color.rs b/src/sse/color.rs
index aeb67bd..517f6e5 100644
--- a/src/sse/color.rs
+++ b/src/sse/color.rs
@@ -1,78 +1,9 @@
+use crate::sse::{_mm_abs_ps, _mm_fmod_ps, _mm_prefer_fma_ps, _mm_select_ps};
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-use crate::luv::{LUV_MULTIPLIER_INVERSE_Y, LUV_WHITE_U_PRIME, LUV_WHITE_V_PRIME};
-use crate::sse::{_mm_abs_ps, _mm_cube_ps, _mm_fmod_ps, _mm_prefer_fma_ps, _mm_select_ps};
-
-#[inline(always)]
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-pub(crate) unsafe fn sse_lab_to_xyz(l: __m128, a: __m128, b: __m128) -> (__m128, __m128, __m128) {
-    let y = _mm_mul_ps(
-        _mm_add_ps(l, _mm_set1_ps(16f32)),
-        _mm_set1_ps(1f32 / 116f32),
-    );
-    let x = _mm_add_ps(_mm_mul_ps(a, _mm_set1_ps(1f32 / 500f32)), y);
-    let z = _mm_sub_ps(y, _mm_mul_ps(b, _mm_set1_ps(1f32 / 200f32)));
-    let x3 = _mm_cube_ps(x);
-    let y3 = _mm_cube_ps(y);
-    let z3 = _mm_cube_ps(z);
-    let kappa = _mm_set1_ps(0.008856f32);
-    let k_sub = _mm_set1_ps(16f32 / 116f32);
-    let mult_1 = _mm_set1_ps(1f32 / 7.787f32);
-    let low_x = _mm_mul_ps(_mm_sub_ps(x, k_sub), mult_1);
-    let low_y = _mm_mul_ps(_mm_sub_ps(y, k_sub), mult_1);
-    let low_z = _mm_mul_ps(_mm_sub_ps(z, k_sub), mult_1);
-
-    let x = _mm_select_ps(_mm_cmpgt_ps(x3, kappa), x3, low_x);
-    let y = _mm_select_ps(_mm_cmpgt_ps(y3, kappa), y3, low_y);
-    let z = _mm_select_ps(_mm_cmpgt_ps(z3, kappa), z3, low_z);
-    let x = _mm_mul_ps(x, _mm_set1_ps(95.047f32 / 100f32));
-    let z = _mm_mul_ps(z, _mm_set1_ps(108.883f32 / 100f32));
-    (x, y, z)
-}
-
-#[inline(always)]
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-pub(crate) unsafe fn sse_luv_to_xyz(l: __m128, u: __m128, v: __m128) -> (__m128, __m128, __m128) {
-    let zeros = _mm_setzero_ps();
-    let zero_mask = _mm_cmpeq_ps(l, zeros);
-    let l13 = _mm_rcp_ps(_mm_mul_ps(l, _mm_set1_ps(13f32)));
-    let u = _mm_prefer_fma_ps(_mm_set1_ps(LUV_WHITE_U_PRIME), l13, u);
-    let v = _mm_prefer_fma_ps(_mm_set1_ps(LUV_WHITE_V_PRIME), l13, v);
-    let l_h = _mm_mul_ps(
-        _mm_add_ps(l, _mm_set1_ps(16f32)),
-        _mm_set1_ps(1f32 / 116f32),
-    );
-    let y_high = _mm_mul_ps(_mm_mul_ps(l_h, l_h), l_h);
-    let y_low = _mm_mul_ps(l, _mm_set1_ps(LUV_MULTIPLIER_INVERSE_Y));
-    let y = _mm_select_ps(
-        zero_mask,
-        zeros,
-        _mm_select_ps(_mm_cmpgt_ps(l, _mm_set1_ps(8f32)), y_high, y_low),
-    );
-    let zero_mask_2 = _mm_cmpeq_ps(v, zeros);
-    let den = _mm_rcp_ps(_mm_mul_ps(v, _mm_set1_ps(4f32)));
-    let mut x = _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(y, u), den), _mm_set1_ps(9f32));
-    x = _mm_select_ps(zero_mask, zeros, x);
-    x = _mm_select_ps(zero_mask_2, zeros, x);
-    let mut z = _mm_mul_ps(
-        _mm_mul_ps(
-            _mm_prefer_fma_ps(
-                _mm_prefer_fma_ps(_mm_set1_ps(12f32), _mm_set1_ps(-3f32), u),
-                v,
-                _mm_set1_ps(-20f32),
-            ),
-            y,
-        ),
-        den,
-    );
-    z = _mm_select_ps(zero_mask, zeros, z);
-    z = _mm_select_ps(zero_mask_2, zeros, z);
-    (x, y, z)
-}
-
 #[inline(always)]
 pub unsafe fn sse_hsl_to_rgb(
     h: __m128,
diff --git a/src/sse/math.rs b/src/sse/math.rs
index 1642010..8a1281a 100644
--- a/src/sse/math.rs
+++ b/src/sse/math.rs
@@ -444,7 +444,6 @@ pub unsafe fn _mm_cos_ps(d: __m128) -> __m128 {
 
     let s = _mm_mul_ps(d, d);
 
-    // TODO: Perform float masking instead
     d = _mm_castsi128_ps(_mm_xor_si128(
         _mm_and_si128(
             _mm_cmpeq_epi32(_mm_and_si128(q, _mm_set1_epi32(2)), _mm_set1_epi32(0)),
@@ -464,3 +463,139 @@ pub unsafe fn _mm_cos_ps(d: __m128) -> __m128 {
 
     return u;
 }
+
+#[inline(always)]
+pub unsafe fn _mm_hypot_ps(x: __m128, y: __m128) -> __m128 {
+    let xp2 = _mm_mul_ps(x, x);
+    let yp2 = _mm_mul_ps(y, y);
+    let z = _mm_add_ps(xp2, yp2);
+    return _mm_sqrt_ps(z);
+}
+
+#[inline(always)]
+pub unsafe fn _mm_poly4_ps(
+    x: __m128,
+    x2: __m128,
+    c3: __m128,
+    c2: __m128,
+    c1: __m128,
+    c0: __m128,
+) -> __m128 {
+    _mm_fmaf_ps(x2, _mm_fmaf_ps(x, c3, c2), _mm_fmaf_ps(x, c1, c0))
+}
+
+#[inline(always)]
+pub unsafe fn _mm_poly8q_ps(
+    x: __m128,
+    x2: __m128,
+    x4: __m128,
+    c7: __m128,
+    c6: __m128,
+    c5: __m128,
+    c4: __m128,
+    c3: __m128,
+    c2: __m128,
+    c1: __m128,
+    c0: __m128,
+) -> __m128 {
+    _mm_fmaf_ps(
+        x4,
+        _mm_poly4_ps(x, x2, c7, c6, c5, c4),
+        _mm_poly4_ps(x, x2, c3, c2, c1, c0),
+    )
+}
+
+#[inline(always)]
+unsafe fn _mm_atan2q_ps_impl(y: __m128, x: __m128) -> __m128 {
+    let q = _mm_select_si128(
+        _mm_castps_si128(_mm_cmplt_ps(x, _mm_setzero_ps())),
+        _mm_set1_epi32(-2),
+        _mm_set1_epi32(0),
+    );
+    let x = _mm_abs_ps(x);
+    let is_y_more_than_x = _mm_cmpgt_ps(y, x);
+    let t = _mm_select_ps(is_y_more_than_x, x, _mm_setzero_ps());
+    let x = _mm_select_ps(is_y_more_than_x, y, x);
+    let y = _mm_select_ps(is_y_more_than_x, _mm_neg_ps(t), y);
+    let q = _mm_select_si128(
+        _mm_castps_si128(is_y_more_than_x),
+        _mm_add_epi32(q, _mm_set1_epi32(1)),
+        q,
+    );
+    let s = _mm_div_ps(y, x);
+    let t = _mm_mul_ps(s, s);
+    let t2 = _mm_mul_ps(t, t);
+    let t4 = _mm_mul_ps(t2, t2);
+    let poly = _mm_poly8q_ps(
+        t,
+        t2,
+        t4,
+        _mm_set1_ps(0.00282363896258175373077393f32),
+        _mm_set1_ps(-0.0159569028764963150024414f32),
+        _mm_set1_ps(0.0425049886107444763183594f32),
+        _mm_set1_ps(-0.0748900920152664184570312f32),
+        _mm_set1_ps(0.106347933411598205566406f32),
+        _mm_set1_ps(-0.142027363181114196777344f32),
+        _mm_set1_ps(0.199926957488059997558594f32),
+        _mm_set1_ps(-0.333331018686294555664062f32),
+    );
+    let t = _mm_prefer_fma_ps(s, _mm_mul_ps(poly, t), s);
+    let t = _mm_prefer_fma_ps(
+        t,
+        _mm_cvtepi32_ps(q),
+        _mm_set1_ps(std::f32::consts::FRAC_PI_2),
+    );
+    t
+}
+
+#[inline(always)]
+pub unsafe fn _mm_atan2_ps(y: __m128, x: __m128) -> __m128 {
+    let r = _mm_atan2q_ps_impl(_mm_abs_ps(y), x);
+    let r = _mm_mulsign_ps(r, x);
+    _mm_mulsign_ps(r, y)
+}
+
+#[inline(always)]
+pub unsafe fn _mm_sin_ps(val: __m128) -> __m128 {
+    let pi_v = _mm_set1_ps(std::f32::consts::PI);
+    let pio2_v = _mm_set1_ps(std::f32::consts::FRAC_PI_2);
+    let ipi_v = _mm_set1_ps(std::f32::consts::FRAC_1_PI);
+
+    //Find positive or negative
+    let c_v = _mm_abs_epi32(_mm_cvtps_epi32(_mm_mul_ps(val, ipi_v)));
+    let sign_v = _mm_castps_si128(_mm_cmple_ps(val, _mm_setzero_ps()));
+    let odd_v = _mm_and_si128(c_v, _mm_set1_epi32(1));
+
+    let neg_v = _mm_xor_si128(odd_v, sign_v);
+
+    //Modulus a - (n * int(a*(1/n)))
+    let mut ma = _mm_sub_ps(_mm_abs_ps(val), _mm_mul_ps(pi_v, _mm_cvtepi32_ps(c_v)));
+    let reb_v = _mm_cmpge_ps(ma, pio2_v);
+
+    //Rebase a between 0 and pi/2
+    ma = _mm_select_ps(reb_v, _mm_sub_ps(pi_v, ma), ma);
+
+    //Taylor series
+    let ma2 = _mm_mul_ps(ma, ma);
+
+    //2nd elem: x^3 / 3!
+    let mut elem = _mm_mul_ps(_mm_mul_ps(ma, ma2), _mm_set1_ps(0.166666666666f32));
+    let mut res = _mm_sub_ps(ma, elem);
+
+    //3rd elem: x^5 / 5!
+    elem = _mm_mul_ps(_mm_mul_ps(elem, ma2), _mm_set1_ps(0.05f32));
+    res = _mm_add_ps(res, elem);
+
+    //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
+    elem = _mm_mul_ps(_mm_mul_ps(elem, ma2), _mm_set1_ps(0.023809523810f32));
+    res = _mm_sub_ps(res, elem);
+
+    //5th elem: x^9 / 9!
+    elem = _mm_mul_ps(_mm_mul_ps(elem, ma2), _mm_set1_ps(0.013888888889f32));
+    res = _mm_add_ps(res, elem);
+
+    //Change of sign
+    let neg_v = _mm_slli_epi32::<31>(neg_v);
+    res = _mm_castsi128_ps(_mm_xor_si128(_mm_castps_si128(res), neg_v));
+    return res;
+}
diff --git a/src/sse/mod.rs b/src/sse/mod.rs
index 5c13e7b..024129b 100644
--- a/src/sse/mod.rs
+++ b/src/sse/mod.rs
@@ -31,6 +31,7 @@ mod to_xyza_laba;
 
 mod xyz_lab_to_image;
 
+mod cie;
 mod from_sigmoidal;
 mod sigmoidal;
 mod to_sigmoidal;
diff --git a/src/sse/to_xyz_lab.rs b/src/sse/to_xyz_lab.rs
index 72e1064..9c374ee 100644
--- a/src/sse/to_xyz_lab.rs
+++ b/src/sse/to_xyz_lab.rs
@@ -1,100 +1,13 @@
 use crate::gamma_curves::TransferFunction;
 use crate::image::ImageConfiguration;
-use crate::image_to_xyz_lab::XyzTarget;
-use crate::luv::{LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y};
+use crate::sse::cie::{sse_triple_to_lab, sse_triple_to_lch, sse_triple_to_luv, sse_triple_to_xyz};
 use crate::sse::*;
+use crate::xyz_target::XyzTarget;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-#[inline(always)]
-pub(crate) unsafe fn sse_triple_to_xyz(
-    r: __m128i,
-    g: __m128i,
-    b: __m128i,
-    c1: __m128,
-    c2: __m128,
-    c3: __m128,
-    c4: __m128,
-    c5: __m128,
-    c6: __m128,
-    c7: __m128,
-    c8: __m128,
-    c9: __m128,
-    transfer: &unsafe fn(__m128) -> __m128,
-) -> (__m128, __m128, __m128) {
-    let u8_scale = _mm_set1_ps(1f32 / 255f32);
-    let r_f = _mm_mul_ps(_mm_cvtepi32_ps(r), u8_scale);
-    let g_f = _mm_mul_ps(_mm_cvtepi32_ps(g), u8_scale);
-    let b_f = _mm_mul_ps(_mm_cvtepi32_ps(b), u8_scale);
-    let r_linear = transfer(r_f);
-    let g_linear = transfer(g_f);
-    let b_linear = transfer(b_f);
-
-    let (x, y, z) = _mm_color_matrix_ps(
-        r_linear, g_linear, b_linear, c1, c2, c3, c4, c5, c6, c7, c8, c9,
-    );
-    (x, y, z)
-}
-
-#[inline(always)]
-pub(crate) unsafe fn sse_triple_to_luv(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-) -> (__m128, __m128, __m128) {
-    let zeros = _mm_setzero_ps();
-    let den = _mm_prefer_fma_ps(
-        _mm_prefer_fma_ps(x, z, _mm_set1_ps(3f32)),
-        y,
-        _mm_set1_ps(15f32),
-    );
-    let nan_mask = _mm_cmpeq_ps(den, _mm_set1_ps(0f32));
-    let l_low_mask = _mm_cmplt_ps(y, _mm_set1_ps(LUV_CUTOFF_FORWARD_Y));
-    let y_cbrt = _mm_cbrt_ps(y);
-    let l = _mm_select_ps(
-        l_low_mask,
-        _mm_mul_ps(y, _mm_set1_ps(LUV_MULTIPLIER_FORWARD_Y)),
-        _mm_prefer_fma_ps(_mm_set1_ps(-16f32), y_cbrt, _mm_set1_ps(116f32)),
-    );
-    let u_prime = _mm_div_ps(_mm_mul_ps(x, _mm_set1_ps(4f32)), den);
-    let v_prime = _mm_div_ps(_mm_mul_ps(y, _mm_set1_ps(9f32)), den);
-    let sub_u_prime = _mm_sub_ps(u_prime, _mm_set1_ps(crate::luv::LUV_WHITE_U_PRIME));
-    let sub_v_prime = _mm_sub_ps(v_prime, _mm_set1_ps(crate::luv::LUV_WHITE_V_PRIME));
-    let l13 = _mm_mul_ps(l, _mm_set1_ps(13f32));
-    let u = _mm_select_ps(nan_mask, zeros, _mm_mul_ps(l13, sub_u_prime));
-    let v = _mm_select_ps(nan_mask, zeros, _mm_mul_ps(l13, sub_v_prime));
-    (l, u, v)
-}
-
-#[inline(always)]
-pub(crate) unsafe fn sse_triple_to_lab(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-) -> (__m128, __m128, __m128) {
-    let x = _mm_mul_ps(x, _mm_set1_ps(100f32 / 95.047f32));
-    let y = _mm_mul_ps(y, _mm_set1_ps(100f32 / 100f32));
-    let z = _mm_mul_ps(z, _mm_set1_ps(100f32 / 108.883f32));
-    let cbrt_x = _mm_cbrt_ps(x);
-    let cbrt_y = _mm_cbrt_ps(y);
-    let cbrt_z = _mm_cbrt_ps(z);
-    let s_1 = _mm_set1_ps(16.0 / 116.0);
-    let s_2 = _mm_set1_ps(7.787);
-    let lower_x = _mm_prefer_fma_ps(s_1, s_2, x);
-    let lower_y = _mm_prefer_fma_ps(s_1, s_2, y);
-    let lower_z = _mm_prefer_fma_ps(s_1, s_2, z);
-    let cutoff = _mm_set1_ps(0.008856f32);
-    let x = _mm_select_ps(_mm_cmpgt_ps(x, cutoff), cbrt_x, lower_x);
-    let y = _mm_select_ps(_mm_cmpgt_ps(y, cutoff), cbrt_y, lower_y);
-    let z = _mm_select_ps(_mm_cmpgt_ps(z, cutoff), cbrt_z, lower_z);
-    let l = _mm_prefer_fma_ps(_mm_set1_ps(-16.0f32), y, _mm_set1_ps(116.0f32));
-    let a = _mm_mul_ps(_mm_sub_ps(x, y), _mm_set1_ps(500f32));
-    let b = _mm_mul_ps(_mm_sub_ps(y, z), _mm_set1_ps(200f32));
-    (l, a, b)
-}
-
 #[inline(always)]
 pub unsafe fn sse_channels_to_xyz_or_lab<
     const CHANNELS_CONFIGURATION: u8,
@@ -199,6 +112,12 @@ pub unsafe fn sse_channels_to_xyz_or_lab<
                 y_low_low = u;
                 z_low_low = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_low_low, y_low_low, z_low_low);
+                x_low_low = l;
+                y_low_low = c;
+                z_low_low = h;
+            }
         }
 
         let (v0, v1, v2) = sse_interleave_ps_rgb(x_low_low, y_low_low, z_low_low);
@@ -229,6 +148,12 @@ pub unsafe fn sse_channels_to_xyz_or_lab<
                 y_low_high = u;
                 z_low_high = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_low_high, y_low_high, z_low_high);
+                x_low_high = l;
+                y_low_high = c;
+                z_low_high = h;
+            }
         }
 
         let (v0, v1, v2) = sse_interleave_ps_rgb(x_low_high, y_low_high, z_low_high);
@@ -263,6 +188,12 @@ pub unsafe fn sse_channels_to_xyz_or_lab<
                 y_high_low = u;
                 z_high_low = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_high_low, y_high_low, z_high_low);
+                x_high_low = l;
+                y_high_low = c;
+                z_high_low = h;
+            }
         }
 
         let (v0, v1, v2) = sse_interleave_ps_rgb(x_high_low, y_high_low, z_high_low);
@@ -304,6 +235,12 @@ pub unsafe fn sse_channels_to_xyz_or_lab<
                 y_high_high = u;
                 z_high_high = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_high_high, y_high_high, z_high_high);
+                x_high_high = l;
+                y_high_high = c;
+                z_high_high = h;
+            }
         }
 
         let (v0, v1, v2) = sse_interleave_ps_rgb(x_high_high, y_high_high, z_high_high);
diff --git a/src/sse/to_xyza_laba.rs b/src/sse/to_xyza_laba.rs
index bbd259d..02255f7 100644
--- a/src/sse/to_xyza_laba.rs
+++ b/src/sse/to_xyza_laba.rs
@@ -1,18 +1,13 @@
-#[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
-#[allow(unused_imports)]
 use crate::image::ImageConfiguration;
-#[allow(unused_imports)]
-use crate::image_to_xyz_lab::XyzTarget;
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-#[allow(unused_imports)]
+use crate::sse::cie::{sse_triple_to_lab, sse_triple_to_lch, sse_triple_to_luv, sse_triple_to_xyz};
 use crate::sse::*;
+use crate::xyz_target::XyzTarget;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub unsafe fn sse_channels_to_xyza_laba<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
     start_cx: usize,
@@ -110,6 +105,12 @@ pub unsafe fn sse_channels_to_xyza_laba<const CHANNELS_CONFIGURATION: u8, const
                 y_low_low = u;
                 z_low_low = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_low_low, y_low_low, z_low_low);
+                x_low_low = l;
+                y_low_low = c;
+                z_low_low = h;
+            }
         }
 
         let a_low = _mm_cvtepu8_epi16(a_chan);
@@ -145,6 +146,12 @@ pub unsafe fn sse_channels_to_xyza_laba<const CHANNELS_CONFIGURATION: u8, const
                 y_low_high = u;
                 z_low_high = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_low_high, y_low_high, z_low_high);
+                x_low_high = l;
+                y_low_high = c;
+                z_low_high = h;
+            }
         }
 
         let a_low_high = _mm_mul_ps(
@@ -186,6 +193,12 @@ pub unsafe fn sse_channels_to_xyza_laba<const CHANNELS_CONFIGURATION: u8, const
                 y_high_low = u;
                 z_high_low = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_high_low, y_high_low, z_high_low);
+                x_high_low = l;
+                y_high_low = c;
+                z_high_low = h;
+            }
         }
 
         let a_high = _mm_unpackhi_epi8(a_chan, _mm_setzero_si128());
@@ -233,6 +246,12 @@ pub unsafe fn sse_channels_to_xyza_laba<const CHANNELS_CONFIGURATION: u8, const
                 y_high_high = u;
                 z_high_high = v;
             }
+            XyzTarget::LCH => {
+                let (l, c, h) = sse_triple_to_lch(x_high_high, y_high_high, z_high_high);
+                x_high_high = l;
+                y_high_high = c;
+                z_high_high = h;
+            }
         }
 
         let a_high_high = _mm_mul_ps(
diff --git a/src/sse/xyz_lab_to_image.rs b/src/sse/xyz_lab_to_image.rs
index 536fdf6..7d7f4a4 100644
--- a/src/sse/xyz_lab_to_image.rs
+++ b/src/sse/xyz_lab_to_image.rs
@@ -1,10 +1,10 @@
 use crate::image::ImageConfiguration;
-use crate::image_to_xyz_lab::XyzTarget;
-use crate::sse::color::{sse_lab_to_xyz, sse_luv_to_xyz};
+use crate::sse::cie::{sse_lab_to_xyz, sse_lch_to_xyz, sse_luv_to_xyz};
 use crate::sse::{
     _mm_color_matrix_ps, get_sse_gamma_transfer, sse_deinterleave_rgb_ps, sse_interleave_rgb,
     sse_interleave_rgba,
 };
+use crate::xyz_target::XyzTarget;
 use crate::TransferFunction;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -51,6 +51,12 @@ unsafe fn sse_xyz_lab_vld<
             g_f32 = y;
             b_f32 = z;
         }
+        XyzTarget::LCH => {
+            let (x, y, z) = sse_lch_to_xyz(r_f32, g_f32, b_f32);
+            r_f32 = x;
+            g_f32 = y;
+            b_f32 = z;
+        }
         _ => {}
     }
 
diff --git a/src/sse/xyza_laba_to_image.rs b/src/sse/xyza_laba_to_image.rs
index be74ffa..13c746c 100644
--- a/src/sse/xyza_laba_to_image.rs
+++ b/src/sse/xyza_laba_to_image.rs
@@ -1,9 +1,9 @@
 use crate::image::ImageConfiguration;
-use crate::image_to_xyz_lab::XyzTarget;
-use crate::sse::color::{sse_lab_to_xyz, sse_luv_to_xyz};
+use crate::sse::cie::{sse_lab_to_xyz, sse_lch_to_xyz, sse_luv_to_xyz};
 use crate::sse::{
     _mm_color_matrix_ps, get_sse_gamma_transfer, sse_deinterleave_rgba_ps, sse_interleave_rgba,
 };
+use crate::xyz_target::XyzTarget;
 use crate::TransferFunction;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -47,6 +47,12 @@ unsafe fn sse_xyza_lab_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
             g_f32 = y;
             b_f32 = z;
         }
+        XyzTarget::LCH => {
+            let (x, y, z) = sse_lch_to_xyz(r_f32, g_f32, b_f32);
+            r_f32 = x;
+            g_f32 = y;
+            b_f32 = z;
+        }
         _ => {}
     }
 
diff --git a/src/xyz_lab_to_image.rs b/src/xyz_lab_to_image.rs
index 9bf747c..70ebb20 100644
--- a/src/xyz_lab_to_image.rs
+++ b/src/xyz_lab_to_image.rs
@@ -112,40 +112,40 @@ fn xyz_to_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool, cons
                     }
                 }
             }
+        }
 
-            #[cfg(all(
-                any(target_arch = "x86_64", target_arch = "x86"),
-                target_feature = "sse4.1"
-            ))]
-            unsafe {
-                if _has_sse {
-                    if USE_ALPHA {
-                        cx = sse_xyz_to_channels::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
-                            cx,
-                            src.as_ptr(),
-                            src_offset,
-                            a_channel.as_ptr(),
-                            a_offset,
-                            dst.as_mut_ptr(),
-                            dst_offset,
-                            width,
-                            &matrix,
-                            transfer_function,
-                        )
-                    } else {
-                        cx = sse_xyz_to_channels::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
-                            cx,
-                            src.as_ptr(),
-                            src_offset,
-                            std::ptr::null(),
-                            0usize,
-                            dst.as_mut_ptr(),
-                            dst_offset,
-                            width,
-                            &matrix,
-                            transfer_function,
-                        )
-                    }
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "sse4.1"
+        ))]
+        unsafe {
+            if _has_sse {
+                if USE_ALPHA {
+                    cx = sse_xyz_to_channels::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
+                        cx,
+                        src.as_ptr(),
+                        src_offset,
+                        a_channel.as_ptr(),
+                        a_offset,
+                        dst.as_mut_ptr(),
+                        dst_offset,
+                        width,
+                        &matrix,
+                        transfer_function,
+                    )
+                } else {
+                    cx = sse_xyz_to_channels::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
+                        cx,
+                        src.as_ptr(),
+                        src_offset,
+                        std::ptr::null(),
+                        0usize,
+                        dst.as_mut_ptr(),
+                        dst_offset,
+                        width,
+                        &matrix,
+                        transfer_function,
+                    )
                 }
             }
         }
diff --git a/src/xyz_target.rs b/src/xyz_target.rs
index 63af39a..66a251c 100644
--- a/src/xyz_target.rs
+++ b/src/xyz_target.rs
@@ -1,5 +1,5 @@
 #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
-pub(crate) enum XyzTarget {
+pub enum XyzTarget {
     LAB = 0,
     XYZ = 1,
     LUV = 2,
diff --git a/src/xyza_laba_to_image.rs b/src/xyza_laba_to_image.rs
index f0511ba..a49ec55 100644
--- a/src/xyza_laba_to_image.rs
+++ b/src/xyza_laba_to_image.rs
@@ -90,32 +90,15 @@ fn xyz_with_alpha_to_channels<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
                     )
                 }
             }
+        }
 
-            #[cfg(all(
-                any(target_arch = "x86_64", target_arch = "x86"),
-                target_feature = "sse4.1"
-            ))]
-            unsafe {
-                if _has_sse {
-                    cx = sse_xyza_to_image::<CHANNELS_CONFIGURATION, TARGET>(
-                        cx,
-                        src.as_ptr(),
-                        src_offset,
-                        dst.as_mut_ptr(),
-                        dst_offset,
-                        width,
-                        &matrix,
-                        transfer_function,
-                    )
-                }
-            }
-
-            #[cfg(all(
-                any(target_arch = "aarch64", target_arch = "arm"),
-                target_feature = "neon"
-            ))]
-            unsafe {
-                cx = neon_xyza_to_image::<CHANNELS_CONFIGURATION, TARGET>(
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "sse4.1"
+        ))]
+        unsafe {
+            if _has_sse {
+                cx = sse_xyza_to_image::<CHANNELS_CONFIGURATION, TARGET>(
                     cx,
                     src.as_ptr(),
                     src_offset,
@@ -128,6 +111,23 @@ fn xyz_with_alpha_to_channels<const CHANNELS_CONFIGURATION: u8, const TARGET: u8
             }
         }
 
+        #[cfg(all(
+            any(target_arch = "aarch64", target_arch = "arm"),
+            target_feature = "neon"
+        ))]
+        unsafe {
+            cx = neon_xyza_to_image::<CHANNELS_CONFIGURATION, TARGET>(
+                cx,
+                src.as_ptr(),
+                src_offset,
+                dst.as_mut_ptr(),
+                dst_offset,
+                width,
+                &matrix,
+                transfer_function,
+            )
+        }
+
         let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 };
         let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };