diff --git a/src/app/src/main.rs b/src/app/src/main.rs
index 45d6510..bb51a19 100644
--- a/src/app/src/main.rs
+++ b/src/app/src/main.rs
@@ -1,7 +1,7 @@
 use std::time::Instant;
 
-use image::{EncodableLayout, GenericImageView};
 use image::io::Reader as ImageReader;
+use image::{EncodableLayout, GenericImageView};
 
 use colorutils_rs::*;
 
@@ -23,7 +23,7 @@ fn main() {
     println!("HSL {:?}", hsl);
     println!("Back RGB {:?}", hsl.to_rgb8());
 
-    let img = ImageReader::open("./assets/beach_horizon.jpg")
+    let img = ImageReader::open("./assets/asset.jpg")
         .unwrap()
         .decode()
         .unwrap();
@@ -100,7 +100,7 @@ fn main() {
             src_stride,
             width,
             height,
-            TransferFunction::Gamma2p8
+            TransferFunction::Gamma2p8,
         );
 
         let elapsed_time = start_time.elapsed();
diff --git a/src/avx/avx_color.rs b/src/avx/color.rs
similarity index 100%
rename from src/avx/avx_color.rs
rename to src/avx/color.rs
diff --git a/src/avx/avx_gamma_curves.rs b/src/avx/gamma_curves.rs
similarity index 99%
rename from src/avx/avx_gamma_curves.rs
rename to src/avx/gamma_curves.rs
index 143ac66..4fddce8 100644
--- a/src/avx/avx_gamma_curves.rs
+++ b/src/avx/gamma_curves.rs
@@ -1,4 +1,4 @@
-use crate::avx::avx_math::*;
+use crate::avx::math::*;
 #[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
 #[cfg(target_arch = "x86")]
diff --git a/src/avx/linear_to_image.rs b/src/avx/linear_to_image.rs
new file mode 100644
index 0000000..ced82a5
--- /dev/null
+++ b/src/avx/linear_to_image.rs
@@ -0,0 +1,153 @@
+use crate::avx::gamma_curves::get_avx_gamma_transfer;
+use crate::avx::{
+    avx2_deinterleave_rgb_ps, avx2_deinterleave_rgba_ps, avx2_interleave_rgb,
+    avx2_interleave_rgba_epi8, avx2_pack_s32, avx2_pack_u16,
+};
+use crate::image::ImageConfiguration;
+use crate::TransferFunction;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+#[inline(always)]
+unsafe fn gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
+    src: *const f32,
+    transfer_function: TransferFunction,
+) -> (__m256i, __m256i, __m256i, __m256i) {
+    let d_alpha = _mm256_set1_ps(1f32);
+    let transfer = get_avx_gamma_transfer(transfer_function);
+    let v_scale_alpha = _mm256_set1_ps(255f32);
+    let (mut r_f32, mut g_f32, mut b_f32, mut a_f32);
+    let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
+
+    let row0 = _mm256_loadu_ps(src);
+    let row1 = _mm256_loadu_ps(src.add(8));
+    let row2 = _mm256_loadu_ps(src.add(16));
+
+    match image_configuration {
+        ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
+            let row3 = _mm256_loadu_ps(src.add(24));
+            let (v0, v1, v2, v3) = avx2_deinterleave_rgba_ps(row0, row1, row2, row3);
+            if image_configuration == ImageConfiguration::Rgba {
+                r_f32 = v0;
+                g_f32 = v1;
+                b_f32 = v2;
+            } else {
+                r_f32 = v2;
+                g_f32 = v1;
+                b_f32 = v0;
+            }
+            a_f32 = v3;
+        }
+        ImageConfiguration::Bgr | ImageConfiguration::Rgb => {
+            let rgb_pixels = avx2_deinterleave_rgb_ps(row0, row1, row2);
+            if image_configuration == ImageConfiguration::Rgb {
+                r_f32 = rgb_pixels.0;
+                g_f32 = rgb_pixels.1;
+                b_f32 = rgb_pixels.2;
+            } else {
+                r_f32 = rgb_pixels.2;
+                g_f32 = rgb_pixels.1;
+                b_f32 = rgb_pixels.0;
+            }
+            a_f32 = d_alpha;
+        }
+    }
+
+    let zeros = _mm256_setzero_ps();
+    r_f32 = _mm256_max_ps(_mm256_min_ps(r_f32, d_alpha), zeros);
+    g_f32 = _mm256_max_ps(_mm256_min_ps(g_f32, d_alpha), zeros);
+    b_f32 = _mm256_max_ps(_mm256_min_ps(b_f32, d_alpha), zeros);
+
+    r_f32 = transfer(r_f32);
+    g_f32 = transfer(g_f32);
+    b_f32 = transfer(b_f32);
+    r_f32 = _mm256_mul_ps(r_f32, v_scale_alpha);
+    g_f32 = _mm256_mul_ps(g_f32, v_scale_alpha);
+    b_f32 = _mm256_mul_ps(b_f32, v_scale_alpha);
+    if USE_ALPHA {
+        a_f32 = _mm256_mul_ps(a_f32, v_scale_alpha);
+    }
+    (
+        _mm256_cvtps_epi32(_mm256_round_ps::<0>(r_f32)),
+        _mm256_cvtps_epi32(_mm256_round_ps::<0>(g_f32)),
+        _mm256_cvtps_epi32(_mm256_round_ps::<0>(b_f32)),
+        _mm256_cvtps_epi32(_mm256_round_ps::<0>(a_f32)),
+    )
+}
+
+#[inline(always)]
+pub unsafe fn avx_linear_to_gamma<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
+    start_cx: usize,
+    src: *const f32,
+    src_offset: u32,
+    dst: *mut u8,
+    dst_offset: u32,
+    width: u32,
+    transfer_function: TransferFunction,
+) -> usize {
+    let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
+    let channels = image_configuration.get_channels_count();
+    let mut cx = start_cx;
+
+    while cx + 32 < width as usize {
+        let offset_src_ptr =
+            ((src as *const u8).add(src_offset as usize) as *const f32).add(cx * channels);
+
+        let src_ptr_0 = offset_src_ptr;
+
+        let (r_row0_, g_row0_, b_row0_, a_row0_) =
+            gamma_vld::<CHANNELS_CONFIGURATION, USE_ALPHA>(src_ptr_0, transfer_function);
+
+        let src_ptr_1 = offset_src_ptr.add(8 * channels);
+
+        let (r_row1_, g_row1_, b_row1_, a_row1_) =
+            gamma_vld::<CHANNELS_CONFIGURATION, USE_ALPHA>(src_ptr_1, transfer_function);
+
+        let src_ptr_2 = offset_src_ptr.add(8 * 2 * channels);
+
+        let (r_row2_, g_row2_, b_row2_, a_row2_) =
+            gamma_vld::<CHANNELS_CONFIGURATION, USE_ALPHA>(src_ptr_2, transfer_function);
+
+        let src_ptr_3 = offset_src_ptr.add(8 * 3 * channels);
+
+        let (r_row3_, g_row3_, b_row3_, a_row3_) =
+            gamma_vld::<CHANNELS_CONFIGURATION, USE_ALPHA>(src_ptr_3, transfer_function);
+
+        let r_row01 = avx2_pack_s32(r_row0_, r_row1_);
+        let g_row01 = avx2_pack_s32(g_row0_, g_row1_);
+        let b_row01 = avx2_pack_s32(b_row0_, b_row1_);
+
+        let r_row23 = avx2_pack_s32(r_row2_, r_row3_);
+        let g_row23 = avx2_pack_s32(g_row2_, g_row3_);
+        let b_row23 = avx2_pack_s32(b_row2_, b_row3_);
+
+        let r_row = avx2_pack_u16(r_row01, r_row23);
+        let g_row = avx2_pack_u16(g_row01, g_row23);
+        let b_row = avx2_pack_u16(b_row01, b_row23);
+
+        let dst_ptr = dst.add(dst_offset as usize + cx * channels);
+
+        if USE_ALPHA {
+            let a_row01 = avx2_pack_s32(a_row0_, a_row1_);
+            let a_row23 = avx2_pack_s32(a_row2_, a_row3_);
+            let a_row = avx2_pack_u16(a_row01, a_row23);
+            let (rgba0, rgba1, rgba2, rgba3) =
+                avx2_interleave_rgba_epi8(r_row, g_row, b_row, a_row);
+            _mm256_storeu_si256(dst_ptr as *mut __m256i, rgba0);
+            _mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, rgba1);
+            _mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, rgba2);
+            _mm256_storeu_si256(dst_ptr.add(96) as *mut __m256i, rgba3);
+        } else {
+            let (rgb0, rgb1, rgb2) = avx2_interleave_rgb(r_row, g_row, b_row);
+            _mm256_storeu_si256(dst_ptr as *mut __m256i, rgb0);
+            _mm256_storeu_si256(dst_ptr.add(32) as *mut __m256i, rgb1);
+            _mm256_storeu_si256(dst_ptr.add(64) as *mut __m256i, rgb2);
+        }
+
+        cx += 32;
+    }
+
+    cx
+}
diff --git a/src/avx/avx_math.rs b/src/avx/math.rs
similarity index 100%
rename from src/avx/avx_math.rs
rename to src/avx/math.rs
diff --git a/src/avx/mod.rs b/src/avx/mod.rs
index 6f9b286..3736abc 100644
--- a/src/avx/mod.rs
+++ b/src/avx/mod.rs
@@ -5,30 +5,22 @@
  * // license that can be found in the LICENSE file.
  */
 
-mod avx2_to_xyz_lab;
-
-mod avx2_utils;
-
-mod avx_color;
-
-mod avx_gamma_curves;
-
-mod avx_math;
-
-mod avx_support;
-
-mod avx_xyz_lab_to_image;
-
-mod avx_xyza_laba_to_image;
-
-pub use avx2_to_xyz_lab::*;
-
-pub use avx2_utils::*;
-
-pub use avx_math::*;
-
-pub use avx_support::*;
-
-pub use avx_xyz_lab_to_image::*;
-
-pub use avx_xyza_laba_to_image::*;
+mod to_xyz_lab;
+mod utils;
+mod color;
+mod gamma_curves;
+mod math;
+mod support;
+mod xyz_lab_to_image;
+mod linear_to_image;
+mod xyza_laba_to_image;
+mod to_linear;
+
+pub use linear_to_image::avx_linear_to_gamma;
+pub use math::*;
+pub use support::*;
+pub use to_xyz_lab::*;
+pub use utils::*;
+pub use xyz_lab_to_image::*;
+pub use xyza_laba_to_image::*;
+pub use to_linear::avx_channels_to_linear;
\ No newline at end of file
diff --git a/src/avx/avx_support.rs b/src/avx/support.rs
similarity index 100%
rename from src/avx/avx_support.rs
rename to src/avx/support.rs
diff --git a/src/avx/to_linear.rs b/src/avx/to_linear.rs
new file mode 100644
index 0000000..484edf6
--- /dev/null
+++ b/src/avx/to_linear.rs
@@ -0,0 +1,207 @@
+use crate::avx::gamma_curves::get_avx2_linear_transfer;
+use crate::avx::{
+    avx2_deinterleave_rgb_epi8, avx2_deinterleave_rgba_epi8, avx2_interleave_rgb_ps,
+    avx2_interleave_rgba_ps,
+};
+use crate::gamma_curves::TransferFunction;
+use crate::image::ImageConfiguration;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+#[inline(always)]
+unsafe fn triple_to_linear(
+    r: __m256i,
+    g: __m256i,
+    b: __m256i,
+    transfer: &unsafe fn(__m256) -> __m256,
+) -> (__m256, __m256, __m256) {
+    let u8_scale = _mm256_set1_ps(1f32 / 255f32);
+    let r_f = _mm256_mul_ps(_mm256_cvtepi32_ps(r), u8_scale);
+    let g_f = _mm256_mul_ps(_mm256_cvtepi32_ps(g), u8_scale);
+    let b_f = _mm256_mul_ps(_mm256_cvtepi32_ps(b), u8_scale);
+    let r_linear = transfer(r_f);
+    let g_linear = transfer(g_f);
+    let b_linear = transfer(b_f);
+    (r_linear, g_linear, b_linear)
+}
+
+#[inline(always)]
+pub unsafe fn avx_channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
+    start_cx: usize,
+    src: *const u8,
+    src_offset: usize,
+    width: u32,
+    dst: *mut f32,
+    dst_offset: usize,
+    transfer_function: TransferFunction,
+) -> usize {
+    let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
+    let channels = image_configuration.get_channels_count();
+    let mut cx = start_cx;
+
+    let transfer = get_avx2_linear_transfer(transfer_function);
+
+    let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32;
+
+    while cx + 32 < width as usize {
+        let (r_chan, g_chan, b_chan, a_chan);
+        let src_ptr = src.add(src_offset + cx * channels);
+        let row1 = _mm256_loadu_si256(src_ptr as *const __m256i);
+        let row2 = _mm256_loadu_si256(src_ptr.add(32) as *const __m256i);
+        let row3 = _mm256_loadu_si256(src_ptr.add(64) as *const __m256i);
+        match image_configuration {
+            ImageConfiguration::Rgb | ImageConfiguration::Bgr => {
+                let (c1, c2, c3) = avx2_deinterleave_rgb_epi8(row1, row2, row3);
+                if image_configuration == ImageConfiguration::Rgb {
+                    r_chan = c1;
+                    g_chan = c2;
+                    b_chan = c3;
+                } else {
+                    r_chan = c3;
+                    g_chan = c2;
+                    b_chan = c1;
+                }
+                a_chan = _mm256_set1_epi8(-128);
+            }
+            ImageConfiguration::Rgba | ImageConfiguration::Bgra => {
+                let row4 = _mm256_loadu_si256(src_ptr.add(96) as *const __m256i);
+                let (c1, c2, c3, c4) = avx2_deinterleave_rgba_epi8(row1, row2, row3, row4);
+                if image_configuration == ImageConfiguration::Rgba {
+                    r_chan = c1;
+                    g_chan = c2;
+                    b_chan = c3;
+                    a_chan = c4;
+                } else {
+                    r_chan = c3;
+                    g_chan = c2;
+                    b_chan = c1;
+                    a_chan = c4;
+                }
+            }
+        }
+
+        let r_low = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(r_chan));
+        let g_low = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(g_chan));
+        let b_low = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(b_chan));
+
+        let r_low_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(r_low));
+        let g_low_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(g_low));
+        let b_low_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(b_low));
+
+        let (x_low_low, y_low_low, z_low_low) =
+            triple_to_linear(r_low_low, g_low_low, b_low_low, &transfer);
+
+        let a_low = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(a_chan));
+
+        let u8_scale = _mm256_set1_ps(1f32 / 255f32);
+
+        if USE_ALPHA {
+            let a_low_low = _mm256_mul_ps(
+                _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_castsi256_si128(a_low))),
+                u8_scale,
+            );
+
+            let (v0, v1, v2, v3) =
+                avx2_interleave_rgba_ps(x_low_low, y_low_low, z_low_low, a_low_low);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 16), v2);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 24), v3);
+        } else {
+            let (v0, v1, v2) = avx2_interleave_rgb_ps(x_low_low, y_low_low, z_low_low);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 16), v2);
+        }
+
+        let r_low_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(r_low));
+        let g_low_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(g_low));
+        let b_low_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(b_low));
+
+        let (x_low_high, y_low_high, z_low_high) =
+            triple_to_linear(r_low_high, g_low_high, b_low_high, &transfer);
+
+        if USE_ALPHA {
+            let a_low_high = _mm256_mul_ps(
+                _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256::<1>(a_low))),
+                u8_scale,
+            );
+
+            let (v0, v1, v2, v3) =
+                avx2_interleave_rgba_ps(x_low_high, y_low_high, z_low_high, a_low_high);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 32), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 32 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 32 + 16), v2);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 32 + 24), v3);
+        } else {
+            let (v0, v1, v2) = avx2_interleave_rgb_ps(x_low_high, y_low_high, z_low_high);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24 + 16), v2);
+        }
+
+        let r_high = _mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>(r_chan));
+        let g_high = _mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>(g_chan));
+        let b_high = _mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>(b_chan));
+
+        let r_high_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(r_high));
+        let g_high_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(g_high));
+        let b_high_low = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(b_high));
+
+        let (x_high_low, y_high_low, z_high_low) =
+            triple_to_linear(r_high_low, g_high_low, b_high_low, &transfer);
+
+        let a_high = _mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>(a_chan));
+
+        if USE_ALPHA {
+            let a_high_low = _mm256_mul_ps(
+                _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_castsi256_si128(a_high))),
+                u8_scale,
+            );
+
+            let (v0, v1, v2, v3) =
+                avx2_interleave_rgba_ps(x_high_low, y_high_low, z_high_low, a_high_low);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 64), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 64 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 64 + 16), v2);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 64 + 32), v3);
+        } else {
+            let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_low, y_high_low, z_high_low);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 48), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 48 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 48 + 16), v2);
+        }
+
+        let r_high_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(r_high));
+        let g_high_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(g_high));
+        let b_high_high = _mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(b_high));
+
+        let (x_high_high, y_high_high, z_high_high) =
+            triple_to_linear(r_high_high, g_high_high, b_high_high, &transfer);
+
+        if USE_ALPHA {
+            let a_high_high = _mm256_mul_ps(
+                _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_mm256_extracti128_si256::<1>(a_high))),
+                u8_scale,
+            );
+
+            let (v0, v1, v2, v3) =
+                avx2_interleave_rgba_ps(x_high_high, y_high_high, z_high_high, a_high_high);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 96), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 96 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 96 + 16), v2);
+            _mm256_storeu_ps(dst_ptr.add(cx * 4 + 96 + 32), v3);
+        } else {
+            let (v0, v1, v2) = avx2_interleave_rgb_ps(x_high_high, y_high_high, z_high_high);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24 * 3), v0);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24 * 3 + 8), v1);
+            _mm256_storeu_ps(dst_ptr.add(cx * 3 + 24 * 3 + 16), v2);
+        }
+
+        cx += 32;
+    }
+
+    cx
+}
diff --git a/src/avx/avx2_to_xyz_lab.rs b/src/avx/to_xyz_lab.rs
similarity index 99%
rename from src/avx/avx2_to_xyz_lab.rs
rename to src/avx/to_xyz_lab.rs
index 13e6e83..d89348d 100644
--- a/src/avx/avx2_to_xyz_lab.rs
+++ b/src/avx/to_xyz_lab.rs
@@ -3,8 +3,8 @@ use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
+use crate::avx::gamma_curves::get_avx2_linear_transfer;
 use crate::avx::*;
-use crate::avx::avx_gamma_curves::get_avx2_linear_transfer;
 #[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
 #[allow(unused_imports)]
diff --git a/src/avx/avx2_utils.rs b/src/avx/utils.rs
similarity index 100%
rename from src/avx/avx2_utils.rs
rename to src/avx/utils.rs
diff --git a/src/avx/avx_xyz_lab_to_image.rs b/src/avx/xyz_lab_to_image.rs
similarity index 98%
rename from src/avx/avx_xyz_lab_to_image.rs
rename to src/avx/xyz_lab_to_image.rs
index f6da6b6..26f261c 100644
--- a/src/avx/avx_xyz_lab_to_image.rs
+++ b/src/avx/xyz_lab_to_image.rs
@@ -1,5 +1,5 @@
-use crate::avx::avx_color::{avx_lab_to_xyz, avx_luv_to_xyz};
-use crate::avx::avx_gamma_curves::get_avx_gamma_transfer;
+use crate::avx::color::{avx_lab_to_xyz, avx_luv_to_xyz};
+use crate::avx::gamma_curves::get_avx_gamma_transfer;
 use crate::avx::{
     _mm256_color_matrix_ps, avx2_deinterleave_rgb_ps, avx2_interleave_rgb,
     avx2_interleave_rgba_epi8, avx2_pack_s32, avx2_pack_u16,
diff --git a/src/avx/avx_xyza_laba_to_image.rs b/src/avx/xyza_laba_to_image.rs
similarity index 96%
rename from src/avx/avx_xyza_laba_to_image.rs
rename to src/avx/xyza_laba_to_image.rs
index 93c1ce8..1cc8d28 100644
--- a/src/avx/avx_xyza_laba_to_image.rs
+++ b/src/avx/xyza_laba_to_image.rs
@@ -3,9 +3,12 @@ use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-use crate::avx::avx_color::{avx_lab_to_xyz, avx_luv_to_xyz};
-use crate::avx::avx_gamma_curves::get_avx_gamma_transfer;
-use crate::avx::{_mm256_color_matrix_ps, avx2_deinterleave_rgba_ps, avx2_interleave_rgba_epi8, avx2_pack_s32, avx2_pack_u16};
+use crate::avx::color::{avx_lab_to_xyz, avx_luv_to_xyz};
+use crate::avx::gamma_curves::get_avx_gamma_transfer;
+use crate::avx::{
+    _mm256_color_matrix_ps, avx2_deinterleave_rgba_ps, avx2_interleave_rgba_epi8, avx2_pack_s32,
+    avx2_pack_u16,
+};
 use crate::image::ImageConfiguration;
 use crate::image_to_xyz_lab::XyzTarget;
 use crate::TransferFunction;
diff --git a/src/image_to_linear.rs b/src/image_to_linear.rs
index 2fad1cc..3a8b889 100644
--- a/src/image_to_linear.rs
+++ b/src/image_to_linear.rs
@@ -1,5 +1,8 @@
-use std::slice;
-
+#[cfg(all(
+    any(target_arch = "x86_64", target_arch = "x86"),
+    target_feature = "avx2"
+))]
+use crate::avx::avx_channels_to_linear;
 use crate::gamma_curves::TransferFunction;
 use crate::image::ImageConfiguration;
 #[cfg(all(
@@ -13,6 +16,7 @@ use crate::neon::neon_channels_to_linear;
 ))]
 use crate::sse::*;
 use crate::Rgb;
+use std::slice;
 
 #[inline(always)]
 fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
@@ -52,9 +56,41 @@ fn channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
         _has_sse = true;
     }
 
+    #[cfg(all(
+        any(target_arch = "x86_64", target_arch = "x86"),
+        target_feature = "avx2"
+    ))]
+    let mut _has_avx2 = false;
+
+    #[cfg(all(
+        any(target_arch = "x86_64", target_arch = "x86"),
+        target_feature = "avx2"
+    ))]
+    if is_x86_feature_detected!("avx2") {
+        _has_avx2 = true;
+    }
+
     for _ in 0..height as usize {
         let mut _cx = 0usize;
 
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "avx2"
+        ))]
+        unsafe {
+            if _has_avx2 {
+                _cx = avx_channels_to_linear::<CHANNELS_CONFIGURATION, USE_ALPHA>(
+                    _cx,
+                    src.as_ptr(),
+                    src_offset,
+                    width,
+                    dst.as_mut_ptr(),
+                    dst_offset,
+                    transfer_function,
+                )
+            }
+        }
+
         #[cfg(all(
             any(target_arch = "x86_64", target_arch = "x86"),
             target_feature = "sse4.1"
diff --git a/src/linear_to_image.rs b/src/linear_to_image.rs
index 9e43290..53c341e 100644
--- a/src/linear_to_image.rs
+++ b/src/linear_to_image.rs
@@ -1,5 +1,8 @@
-use std::slice;
-
+#[cfg(all(
+    any(target_arch = "x86_64", target_arch = "x86"),
+    target_feature = "avx2"
+))]
+use crate::avx::avx_linear_to_gamma;
 use crate::gamma_curves::TransferFunction;
 use crate::image::ImageConfiguration;
 #[cfg(all(
@@ -13,6 +16,7 @@ use crate::neon::neon_linear_to_gamma;
 ))]
 use crate::sse::sse_linear_to_gamma;
 use crate::Rgb;
+use std::slice;
 
 #[inline(always)]
 fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
@@ -52,9 +56,41 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
         _has_sse = true;
     }
 
+    #[cfg(all(
+        any(target_arch = "x86_64", target_arch = "x86"),
+        target_feature = "avx2"
+    ))]
+    let mut _has_avx2 = false;
+
+    #[cfg(all(
+        any(target_arch = "x86_64", target_arch = "x86"),
+        target_feature = "avx2"
+    ))]
+    if is_x86_feature_detected!("avx2") {
+        _has_avx2 = true;
+    }
+
     for _ in 0..height as usize {
         let mut _cx = 0usize;
 
+        #[cfg(all(
+            any(target_arch = "x86_64", target_arch = "x86"),
+            target_feature = "avx2"
+        ))]
+        unsafe {
+            if _has_avx2 {
+                _cx = avx_linear_to_gamma::<CHANNELS_CONFIGURATION, USE_ALPHA>(
+                    _cx,
+                    src.as_ptr(),
+                    src_offset as u32,
+                    dst.as_mut_ptr(),
+                    dst_offset as u32,
+                    width,
+                    transfer_function,
+                )
+            }
+        }
+
         #[cfg(all(
             any(target_arch = "x86_64", target_arch = "x86"),
             target_feature = "sse4.1"
@@ -113,7 +149,11 @@ fn linear_to_gamma_channels<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: b
                     .read_unaligned()
             };
 
-            let rgb = Rgb::<f32>::new(r, g, b);
+            let rgb = Rgb::<f32>::new(
+                r.min(1f32).max(0f32),
+                g.min(1f32).max(0f32),
+                b.min(1f32).max(0f32),
+            );
 
             unsafe {
                 *dst_slice.get_unchecked_mut(px) = (transfer(rgb.r) * 255f32) as u8;
diff --git a/src/neon/mod.rs b/src/neon/mod.rs
index a5d7bd0..2fca0d5 100644
--- a/src/neon/mod.rs
+++ b/src/neon/mod.rs
@@ -1,29 +1,29 @@
-mod from_sigmoidal;
 mod colors;
+mod from_sigmoidal;
 mod gamma_curves;
 mod hsv_to_image;
 mod image_to_hsv;
 mod linear_to_image;
 mod math;
+mod sigmoidal;
 mod to_linear;
 mod to_linear_u8;
+mod to_sigmoidal;
 mod to_xyz_lab;
 mod to_xyza_laba;
 mod xyz_lab_to_image;
 mod xyza_laba_to_image;
-mod sigmoidal;
-mod to_sigmoidal;
 
-pub use from_sigmoidal::neon_from_sigmoidal_row;
 pub use colors::*;
+pub use from_sigmoidal::neon_from_sigmoidal_row;
 pub use gamma_curves::*;
 pub use hsv_to_image::*;
 pub use image_to_hsv::*;
 pub use linear_to_image::*;
 pub use to_linear::*;
 pub use to_linear_u8::*;
+pub use to_sigmoidal::neon_image_to_sigmoidal;
 pub use to_xyz_lab::*;
 pub use to_xyza_laba::*;
 pub use xyz_lab_to_image::*;
 pub use xyza_laba_to_image::*;
-pub use to_sigmoidal::neon_image_to_sigmoidal;
diff --git a/src/sse/image_to_linear_u8.rs b/src/sse/image_to_linear_u8.rs
index 2e6608d..e506b64 100644
--- a/src/sse/image_to_linear_u8.rs
+++ b/src/sse/image_to_linear_u8.rs
@@ -1,11 +1,6 @@
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 pub mod sse_image_to_linear_unsigned {
-    #[allow(unused_imports)]
-    use crate::gamma_curves::TransferFunction;
-    #[allow(unused_imports)]
     use crate::image::ImageConfiguration;
-    #[allow(unused_imports)]
-    use crate::image_to_xyz_lab::XyzTarget;
     use crate::sse::*;
     #[cfg(target_arch = "x86")]
     use std::arch::x86::*;
diff --git a/src/sse/linear_to_image.rs b/src/sse/linear_to_image.rs
index c248392..01beb23 100644
--- a/src/sse/linear_to_image.rs
+++ b/src/sse/linear_to_image.rs
@@ -1,15 +1,11 @@
-#[allow(unused_imports)]
 use crate::image::ImageConfiguration;
-#[allow(unused_imports)]
 use crate::sse::*;
-#[allow(unused_imports)]
 use crate::TransferFunction;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 unsafe fn sse_gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
     src: *const f32,
@@ -55,6 +51,11 @@ unsafe fn sse_gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>
         }
     }
 
+    let zeros = _mm_setzero_ps();
+    r_f32 = _mm_max_ps(_mm_min_ps(r_f32, d_alpha), zeros);
+    g_f32 = _mm_max_ps(_mm_min_ps(g_f32, d_alpha), zeros);
+    b_f32 = _mm_max_ps(_mm_min_ps(b_f32, d_alpha), zeros);
+
     r_f32 = transfer(r_f32);
     g_f32 = transfer(g_f32);
     b_f32 = transfer(b_f32);
@@ -64,15 +65,16 @@ unsafe fn sse_gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>
     if USE_ALPHA {
         a_f32 = _mm_mul_ps(a_f32, v_scale_alpha);
     }
+    const ROUNDING_FLAGS: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
+
     (
-        _mm_cvtps_epi32(r_f32),
-        _mm_cvtps_epi32(g_f32),
-        _mm_cvtps_epi32(b_f32),
-        _mm_cvtps_epi32(a_f32),
+        _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(r_f32)),
+        _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(g_f32)),
+        _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(b_f32)),
+        _mm_cvtps_epi32(_mm_round_ps::<ROUNDING_FLAGS>(a_f32)),
     )
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub unsafe fn sse_linear_to_gamma<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
     start_cx: usize,
diff --git a/src/sse/math.rs b/src/sse/math.rs
index 9866c4d..1b51179 100644
--- a/src/sse/math.rs
+++ b/src/sse/math.rs
@@ -150,16 +150,12 @@ pub unsafe fn _mm_exp_ps(x: __m128) -> __m128 {
     return poly;
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
-#[allow(dead_code)]
 pub unsafe fn _mm_pow_ps(x: __m128, n: __m128) -> __m128 {
     _mm_exp_ps(_mm_mul_ps(n, _mm_log_ps(x)))
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
-#[allow(dead_code)]
 pub unsafe fn _mm_pow_n_ps(x: __m128, n: f32) -> __m128 {
     _mm_exp_ps(_mm_mul_ps(_mm_set1_ps(n), _mm_log_ps(x)))
 }
diff --git a/src/sse/to_linear.rs b/src/sse/to_linear.rs
index 8ca9592..628d299 100644
--- a/src/sse/to_linear.rs
+++ b/src/sse/to_linear.rs
@@ -1,18 +1,11 @@
-#[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
-#[allow(unused_imports)]
 use crate::image::ImageConfiguration;
-#[allow(unused_imports)]
-use crate::image_to_xyz_lab::XyzTarget;
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-#[allow(unused_imports)]
 use crate::sse::*;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 unsafe fn sse_triple_to_linear(
     r: __m128i,
@@ -30,7 +23,6 @@ unsafe fn sse_triple_to_linear(
     (r_linear, g_linear, b_linear)
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub unsafe fn sse_channels_to_linear<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
     start_cx: usize,
diff --git a/src/sse/to_xyz_lab.rs b/src/sse/to_xyz_lab.rs
index ab0e598..72e1064 100644
--- a/src/sse/to_xyz_lab.rs
+++ b/src/sse/to_xyz_lab.rs
@@ -1,9 +1,7 @@
 use crate::gamma_curves::TransferFunction;
 use crate::image::ImageConfiguration;
-#[allow(unused_imports)]
 use crate::image_to_xyz_lab::XyzTarget;
 use crate::luv::{LUV_CUTOFF_FORWARD_Y, LUV_MULTIPLIER_FORWARD_Y};
-#[allow(unused_imports)]
 use crate::sse::*;
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -40,7 +38,6 @@ pub(crate) unsafe fn sse_triple_to_xyz(
     (x, y, z)
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub(crate) unsafe fn sse_triple_to_luv(
     x: __m128,
@@ -71,7 +68,6 @@ pub(crate) unsafe fn sse_triple_to_luv(
     (l, u, v)
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub(crate) unsafe fn sse_triple_to_lab(
     x: __m128,
@@ -99,7 +95,6 @@ pub(crate) unsafe fn sse_triple_to_lab(
     (l, a, b)
 }
 
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 #[inline(always)]
 pub unsafe fn sse_channels_to_xyz_or_lab<
     const CHANNELS_CONFIGURATION: u8,