Big reworking with speed increasing

awxkee · Oct 10, 2024 · fb21ab3 · fb21ab3
1 parent f3ee410
commit fb21ab3
Show file tree

Hide file tree

Showing 30 changed files with 1,151 additions and 1,727 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/src/app/src/main.rs b/src/app/src/main.rs
@@ -68,15 +68,14 @@ fn main() {
         lab_store.resize(width as usize * components * height as usize, 0.);
         let src_stride = width * components as u32;
         let start_time = Instant::now();
-        rgb_to_jzazbz(
+        bgr_to_lalphabeta(
             src_bytes,
             src_stride,
             &mut lab_store,
             store_stride as u32,
             width,
             height,
-            200.,
-            TransferFunction::Srgb,
+            TransferFunction::Pq,
         );
         let elapsed_time = start_time.elapsed();
         // Print the elapsed time in milliseconds
@@ -104,15 +103,14 @@ fn main() {
         // }
 
         let start_time = Instant::now();
-        jzazbz_to_rgb(
+        lalphabeta_to_bgr(
             &lab_store,
             store_stride as u32,
             &mut dst_slice,
             src_stride,
             width,
             height,
-            200.,
-            TransferFunction::Srgb,
+            TransferFunction::Pq,
         );
 
         let elapsed_time = start_time.elapsed();

diff --git a/src/avx/gamma_curves.rs b/src/avx/gamma_curves.rs
@@ -4,7 +4,7 @@
  * // Use of this source code is governed by a BSD-style
  * // license that can be found in the LICENSE file.
  */
-
+#![allow(dead_code)]
 use crate::avx::math::*;
 #[allow(unused_imports)]
 use crate::gamma_curves::TransferFunction;
@@ -148,28 +148,3 @@ pub unsafe fn avx2_gamma2p2_from_linear(linear: __m256) -> __m256 {
 pub unsafe fn avx2_gamma2p8_from_linear(linear: __m256) -> __m256 {
     avx2_pure_gamma(linear, 1f32 / 2.8f32)
 }
-
-#[inline(always)]
-pub unsafe fn perform_avx_gamma_transfer(transfer_function: TransferFunction, v: __m256) -> __m256 {
-    match transfer_function {
-        TransferFunction::Srgb => avx2_srgb_from_linear(v),
-        TransferFunction::Rec709 => avx2_rec709_from_linear(v),
-        TransferFunction::Gamma2p2 => avx2_gamma2p2_from_linear(v),
-        TransferFunction::Gamma2p8 => avx2_gamma2p8_from_linear(v),
-        TransferFunction::Smpte428 => avx2_smpte428_from_linear(v),
-    }
-}
-
-#[inline(always)]
-pub unsafe fn perform_avx2_linear_transfer(
-    transfer_function: TransferFunction,
-    v: __m256,
-) -> __m256 {
-    match transfer_function {
-        TransferFunction::Srgb => avx2_srgb_to_linear(v),
-        TransferFunction::Rec709 => avx2_rec709_to_linear(v),
-        TransferFunction::Gamma2p2 => avx2_gamma2p2_to_linear(v),
-        TransferFunction::Gamma2p8 => avx2_gamma2p8_to_linear(v),
-        TransferFunction::Smpte428 => avx2_smpte428_to_linear(v),
-    }
-}
diff --git a/src/gamma_curves.rs b/src/gamma_curves.rs
@@ -75,6 +75,110 @@ pub fn smpte428_from_linear(linear: f32) -> f32 {
     (0.91655527974030934f32 * linear.max(0.)).powf(POWER_VALUE)
 }
 
+#[inline]
+/// Linear transfer function for Smpte 240
+pub fn smpte240_to_linear(gamma: f32) -> f32 {
+    if gamma < 0.0 {
+        0.0
+    } else if gamma < 4.0 * 0.022821585529445 {
+        gamma / 4.0
+    } else if gamma < 1.0 {
+        f32::powf((gamma + 0.111572195921731) / 1.111572195921731, 1.0 / 0.45)
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Smpte 240
+pub fn smpte240_from_linear(linear: f32) -> f32 {
+    if linear < 0.0 {
+        0.0
+    } else if linear < 0.022821585529445 {
+        linear * 4.0
+    } else if linear < 1.0 {
+        1.111572195921731 * f32::powf(linear, 0.45) - 0.111572195921731
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Log100
+pub fn log100_from_linear(linear: f32) -> f32 {
+    if linear <= 0.01f32 {
+        0.
+    } else {
+        1. + linear.min(1.).log10() / 2.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for Log100
+pub fn log100_to_linear(gamma: f32) -> f32 {
+    // The function is non-bijective so choose the middle of [0, 0.00316227766f].
+    const MID_INTERVAL: f32 = 0.01 / 2.;
+    if gamma <= 0. {
+        MID_INTERVAL
+    } else {
+        10f32.powf(2. * (gamma.min(1.) - 1.))
+    }
+}
+
+#[inline]
+/// Linear transfer function for Log100Sqrt10
+pub fn log100_sqrt10_to_linear(gamma: f32) -> f32 {
+    // The function is non-bijective so choose the middle of [0, 0.00316227766f].
+    const MID_INTERVAL: f32 = 0.00316227766 / 2.;
+    if gamma <= 0. {
+        MID_INTERVAL
+    } else {
+        10f32.powf(2.5 * (gamma.min(1.) - 1.))
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Log100Sqrt10
+pub fn log100_sqrt10_from_linear(linear: f32) -> f32 {
+    if linear <= 0.00316227766 {
+        0.0
+    } else {
+        1.0 + linear.min(1.).log10() / 2.5
+    }
+}
+
+#[inline]
+/// Gamma transfer function for Bt.1361
+pub fn bt1361_from_linear(linear: f32) -> f32 {
+    if linear < -0.25 {
+        -0.25
+    } else if linear < 0.0 {
+        -0.27482420670236 * f32::powf(-4.0 * linear, 0.45) + 0.02482420670236
+    } else if linear < 0.018053968510807 {
+        linear * 4.5
+    } else if linear < 1.0 {
+        1.09929682680944 * f32::powf(linear, 0.45) - 0.09929682680944
+    } else {
+        1.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for Bt.1361
+pub fn bt1361_to_linear(gamma: f32) -> f32 {
+    if gamma < -0.25 {
+        -0.25
+    } else if gamma < 0.0 {
+        f32::powf((gamma - 0.02482420670236) / -0.27482420670236, 1.0 / 0.45) / -4.0
+    } else if gamma < 4.5 * 0.018053968510807 {
+        gamma / 4.5
+    } else if gamma < 1.0 {
+        f32::powf((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45)
+    } else {
+        1.0
+    }
+}
+
 #[inline(always)]
 /// Pure gamma transfer function for gamma 2.2
 pub fn pure_gamma_function(x: f32, gamma: f32) -> f32 {
@@ -111,6 +215,80 @@ pub fn gamma2p8_to_linear(gamma: f32) -> f32 {
     pure_gamma_function(gamma, 2.8f32)
 }
 
+#[inline]
+/// Linear transfer function for PQ
+pub fn pq_to_linear(gamma: f32) -> f32 {
+    if gamma > 0.0 {
+        let pow_gamma = f32::powf(gamma, 1.0 / 78.84375);
+        let num = (pow_gamma - 0.8359375).max(0.);
+        let den = (18.8515625 - 18.6875 * pow_gamma).max(f32::MIN);
+        let linear = f32::powf(num / den, 1.0 / 0.1593017578125);
+        // Scale so that SDR white is 1.0 (extended SDR).
+        const PQ_MAX_NITS: f32 = 10000.;
+        const SDR_WHITE_NITS: f32 = 203.;
+        linear * PQ_MAX_NITS / SDR_WHITE_NITS
+    } else {
+        0.0
+    }
+}
+
+#[inline]
+/// Gamma transfer function for PQ
+pub fn pq_from_linear(linear: f32) -> f32 {
+    const PQ_MAX_NITS: f32 = 10000.;
+    const SDR_WHITE_NITS: f32 = 203.;
+
+    if linear > 0.0 {
+        // Scale from extended SDR range to [0.0, 1.0].
+        let linear = (linear * SDR_WHITE_NITS / PQ_MAX_NITS).clamp(0., 1.);
+        let pow_linear = f32::powf(linear, 0.1593017578125);
+        let num = 0.1640625 * pow_linear - 0.1640625;
+        let den = 1.0 + 18.6875 * pow_linear;
+        f32::powf(1.0 + num / den, 78.84375)
+    } else {
+        0.0
+    }
+}
+
+#[inline]
+/// Linear transfer function for HLG
+pub fn hlg_to_linear(gamma: f32) -> f32 {
+    const SDR_WHITE_NITS: f32 = 203.;
+    const HLG_WHITE_NITS: f32 = 1000.;
+    if gamma < 0.0 {
+        return 0.0;
+    }
+    let linear;
+    if gamma <= 0.5 {
+        linear = f32::powf((gamma * gamma) * (1.0 / 3.0), 1.2);
+    } else {
+        linear = f32::powf(
+            (f32::exp((gamma - 0.55991073) / 0.17883277) + 0.28466892) / 12.0,
+            1.2,
+        );
+    }
+    // Scale so that SDR white is 1.0 (extended SDR).
+    linear * HLG_WHITE_NITS / SDR_WHITE_NITS
+}
+
+#[inline]
+/// Gamma transfer function for HLG
+pub fn hlg_from_linear(linear: f32) -> f32 {
+    const SDR_WHITE_NITS: f32 = 203.;
+    const HLG_WHITE_NITS: f32 = 1000.;
+    // Scale from extended SDR range to [0.0, 1.0].
+    let mut linear = (linear * (SDR_WHITE_NITS / HLG_WHITE_NITS)).clamp(0., 1.);
+    // Inverse OOTF followed by OETF see Table 5 and Note 5i in ITU-R BT.2100-2 page 7-8.
+    linear = f32::powf(linear, 1.0 / 1.2);
+    if linear < 0.0 {
+        0.0
+    } else if linear <= (1.0 / 12.0) {
+        f32::sqrt(3.0 * linear)
+    } else {
+        0.17883277 * f32::ln(12.0 * linear - 0.28466892) + 0.55991073
+    }
+}
+
 #[repr(C)]
 #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
 /// Declares transfer function for transfer components into a linear colorspace and its inverse
@@ -119,12 +297,24 @@ pub enum TransferFunction {
     Srgb,
     /// Rec.709 Transfer function
     Rec709,
-    /// Pure gamma 2.2 Transfer function
+    /// Pure gamma 2.2 Transfer function, ITU-R 470M
     Gamma2p2,
-    /// Pure gamma 2.8 Transfer function
+    /// Pure gamma 2.8 Transfer function, ITU-R 470BG
     Gamma2p8,
     /// Smpte 428 Transfer function
     Smpte428,
+    /// Log100 Transfer function
+    Log100,
+    /// Log100Sqrt10 Transfer function
+    Log100Sqrt10,
+    /// Bt1361 Transfer function
+    Bt1361,
+    /// Smpte 240 Transfer function
+    Smpte240,
+    /// PQ Transfer function
+    Pq,
+    /// HLG (Hybrid log gamma) Transfer function
+    Hlg,
 }
 
 impl From<u8> for TransferFunction {
@@ -136,6 +326,12 @@ impl From<u8> for TransferFunction {
             2 => TransferFunction::Gamma2p2,
             3 => TransferFunction::Gamma2p8,
             4 => TransferFunction::Smpte428,
+            5 => TransferFunction::Log100,
+            6 => TransferFunction::Log100Sqrt10,
+            7 => TransferFunction::Bt1361,
+            8 => TransferFunction::Smpte240,
+            9 => TransferFunction::Pq,
+            10 => TransferFunction::Hlg,
             _ => TransferFunction::Srgb,
         }
     }
@@ -150,6 +346,12 @@ impl TransferFunction {
             TransferFunction::Gamma2p8 => gamma2p8_to_linear(v),
             TransferFunction::Gamma2p2 => gamma2p2_to_linear(v),
             TransferFunction::Smpte428 => smpte428_to_linear(v),
+            TransferFunction::Log100 => log100_to_linear(v),
+            TransferFunction::Log100Sqrt10 => log100_sqrt10_to_linear(v),
+            TransferFunction::Bt1361 => bt1361_to_linear(v),
+            TransferFunction::Smpte240 => smpte240_to_linear(v),
+            TransferFunction::Pq => pq_to_linear(v),
+            TransferFunction::Hlg => hlg_to_linear(v),
         }
     }
 
@@ -160,7 +362,13 @@ impl TransferFunction {
             TransferFunction::Rec709 => rec709_from_linear(v),
             TransferFunction::Gamma2p2 => gamma2p2_from_linear(v),
             TransferFunction::Gamma2p8 => gamma2p8_from_linear(v),
-            TransferFunction::Smpte428 => smpte428_to_linear(v),
+            TransferFunction::Smpte428 => smpte428_from_linear(v),
+            TransferFunction::Log100 => log100_from_linear(v),
+            TransferFunction::Log100Sqrt10 => log100_sqrt10_from_linear(v),
+            TransferFunction::Bt1361 => bt1361_from_linear(v),
+            TransferFunction::Smpte240 => smpte240_from_linear(v),
+            TransferFunction::Pq => pq_from_linear(v),
+            TransferFunction::Hlg => hlg_from_linear(v),
         }
     }
 }