Skip to content

Commit

Permalink
Runtime dispatch, added Smpte428, rayon multithreading
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 8, 2024
1 parent f910394 commit 6ee5fa7
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/avx/gamma_curves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,30 @@ pub unsafe fn avx2_pure_gamma(x: __m256, value: f32) -> __m256 {
_mm256_select_ps(ones_mask, ones, rs)
}

#[inline(always)]
pub unsafe fn avx2_smpte428_from_linear(linear: __m256) -> __m256 {
const POWER_VALUE: f32 = 1.0f32 / 2.6f32;
_mm256_pow_ps(
_mm256_mul_ps(
_mm256_max_ps(linear, _mm256_setzero_ps()),
_mm256_set1_ps(0.91655527974030934f32),
),
_mm256_set1_ps(POWER_VALUE),
)
}

#[inline(always)]
pub unsafe fn avx2_smpte428_to_linear(gamma: __m256) -> __m256 {
const SCALE: f32 = 1. / 0.91655527974030934f32;
_mm256_mul_ps(
_mm256_pow_ps(
_mm256_max_ps(gamma, _mm256_setzero_ps()),
_mm256_set1_ps(2.6f32),
),
_mm256_set1_ps(SCALE),
)
}

#[inline(always)]
pub unsafe fn avx2_gamma2p2_to_linear(gamma: __m256) -> __m256 {
avx2_pure_gamma(gamma, 2.2f32)
Expand Down Expand Up @@ -132,6 +156,7 @@ pub unsafe fn perform_avx_gamma_transfer(transfer_function: TransferFunction, v:
TransferFunction::Rec709 => avx2_rec709_from_linear(v),
TransferFunction::Gamma2p2 => avx2_gamma2p2_from_linear(v),
TransferFunction::Gamma2p8 => avx2_gamma2p8_from_linear(v),
TransferFunction::Smpte428 => avx2_smpte428_from_linear(v),
}
}

Expand All @@ -145,5 +170,6 @@ pub unsafe fn perform_avx2_linear_transfer(
TransferFunction::Rec709 => avx2_rec709_to_linear(v),
TransferFunction::Gamma2p2 => avx2_gamma2p2_to_linear(v),
TransferFunction::Gamma2p8 => avx2_gamma2p8_to_linear(v),
TransferFunction::Smpte428 => avx2_smpte428_to_linear(v),
}
}
24 changes: 24 additions & 0 deletions src/sse/gamma_curves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

use crate::gamma_curves::TransferFunction;
use crate::sse::*;
use erydanos::_mm_pow_ps;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -103,6 +104,27 @@ pub unsafe fn sse_pure_gamma(gamma: __m128, value: f32) -> __m128 {
_mm_select_ps(ones_mask, ones, rs)
}

#[inline(always)]
pub unsafe fn sse_smpte428_from_linear(linear: __m128) -> __m128 {
const POWER_VALUE: f32 = 1.0f32 / 2.6f32;
_mm_pow_ps(
_mm_mul_ps(
_mm_max_ps(linear, _mm_setzero_ps()),
_mm_set1_ps(0.91655527974030934f32),
),
_mm_set1_ps(POWER_VALUE),
)
}

#[inline(always)]
pub unsafe fn sse_smpte428_to_linear(gamma: __m128) -> __m128 {
const SCALE: f32 = 1. / 0.91655527974030934f32;
_mm_mul_ps(
_mm_pow_ps(_mm_max_ps(gamma, _mm_setzero_ps()), _mm_set1_ps(2.6f32)),
_mm_set1_ps(SCALE),
)
}

#[inline(always)]
pub unsafe fn sse_gamma2p2_to_linear(gamma: __m128) -> __m128 {
sse_pure_gamma(gamma, 2.2f32)
Expand Down Expand Up @@ -133,6 +155,7 @@ pub unsafe fn perform_sse_linear_transfer(
TransferFunction::Rec709 => sse_rec709_to_linear(v),
TransferFunction::Gamma2p2 => sse_gamma2p2_to_linear(v),
TransferFunction::Gamma2p8 => sse_gamma2p8_to_linear(v),
TransferFunction::Smpte428 => sse_smpte428_from_linear(v),
}
}

Expand All @@ -143,5 +166,6 @@ pub unsafe fn perform_sse_gamma_transfer(transfer_function: TransferFunction, v:
TransferFunction::Rec709 => sse_rec709_from_linear(v),
TransferFunction::Gamma2p2 => sse_gamma2p2_from_linear(v),
TransferFunction::Gamma2p8 => sse_gamma2p8_from_linear(v),
TransferFunction::Smpte428 => sse_smpte428_to_linear(v),
}
}

0 comments on commit 6ee5fa7

Please sign in to comment.