Skip to content

Commit

Permalink
Bugxfixes in Jzazbz
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jul 22, 2024
1 parent 38b8048 commit a30ea28
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 65 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ jobs:
- run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu i686-unknown-linux-gnu powerpc-unknown-linux-gnu
- run: RUSTFLAGS="-C target-feature=+neon" cargo build --target aarch64-unknown-linux-gnu
- run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target i686-unknown-linux-gnu
- run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target i686-unknown-linux-gnu
- run: cargo build --target powerpc-unknown-linux-gnu
- run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target x86_64-unknown-linux-gnu
- run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target x86_64-unknown-linux-gnu
- name: Test release pipeline
run: cargo publish --dry-run
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["src/app"] }

[package]
name = "colorutils-rs"
version = "0.5.1"
version = "0.5.2"
edition = "2021"
description = "High performance utilities for color format handling and conversion."
readme = "README.md"
Expand Down
14 changes: 5 additions & 9 deletions src/app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,11 @@ fn main() {
let g = 127;
let b = 255;
let rgb = Rgb::<u8>::new(r, g, b);
// let jzazbz = Jzazbz::from_rgb(rgb, TransferFunction::Srgb);
// println!("Jzczhz {:?}", jzazbz);
// println!("Rgb {:?}", rgb);
// let restored = jzazbz.to_rgb(TransferFunction::Srgb);
// println!("Restored RGB {:?}", restored);
println!(
"Restored RGB {:?}",
Jzazbz::new(0.1f32, 0.0, -0.2f32).to_rgb(TransferFunction::Srgb)
);
let jzazbz = Jzazbz::from_rgb(rgb, TransferFunction::Srgb);
println!("Jzczhz {:?}", jzazbz);
println!("Rgb {:?}", rgb);
let restored = jzazbz.to_rgb(TransferFunction::Srgb);
println!("Restored RGB {:?}", restored);

let img = ImageReader::open("./assets/beach_horizon.jpg")
.unwrap()
Expand Down
11 changes: 8 additions & 3 deletions src/neon/image_to_jzazbz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::image_to_jzazbz::JzazbzTarget;
use crate::neon::get_neon_linear_transfer;
use crate::neon::math::{vcolorq_matrix_f32, vpowq_n_f32};
use crate::{load_u8_and_deinterleave, TransferFunction, SRGB_TO_XYZ_D65};
use erydanos::{vatan2q_f32, vhypotq_fast_f32, vmlafq_f32};
use erydanos::{vatan2q_f32, vhypotq_fast_f32, visnanq_f32, vmlafq_f32, vpowq_f32};
use std::arch::aarch64::*;

macro_rules! perceptual_quantizer {
Expand All @@ -18,8 +18,13 @@ macro_rules! perceptual_quantizer {
let xx = vpowq_n_f32(vmulq_n_f32($color, 1e-4), 0.1593017578125);
let jx = vmlafq_f32(vdupq_n_f32(18.8515625), xx, vdupq_n_f32(0.8359375));
let den_jx = vmlafq_f32(xx, vdupq_n_f32(18.6875), vdupq_n_f32(1.));
let rs = vpowq_n_f32(vdivq_f32(jx, den_jx), 134.034375);
vbslq_f32(flush_to_zero_mask, vdupq_n_f32(0.), rs)
let rs = vpowq_f32(vdivq_f32(jx, den_jx), vdupq_n_f32(134.034375));
let flush_nan_to_zero_mask = visnanq_f32(rs);
vbslq_f32(
vorrq_u32(flush_to_zero_mask, flush_nan_to_zero_mask),
vdupq_n_f32(0.),
rs,
)
}};
}

Expand Down
11 changes: 7 additions & 4 deletions src/neon/jzazbz_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

use std::arch::aarch64::*;

use erydanos::{vcosq_f32, vmlafq_f32, vsinq_f32};
use erydanos::{vcosq_f32, visnanq_f32, vmlafq_f32, vpowq_f32, vsinq_f32};

use crate::image::ImageConfiguration;
use crate::image_to_jzazbz::JzazbzTarget;
Expand All @@ -21,10 +21,13 @@ macro_rules! perceptual_quantizer_inverse {
let xx = vpowq_n_f32($color, 7.460772656268214e-03);
let num = vsubq_f32(vdupq_n_f32(0.8359375), xx);
let den = vmlafq_f32(xx, vdupq_n_f32(18.6875), vdupq_n_f32(-18.8515625));
let den_is_zero = vceqzq_f32(den);
let rs = vmulq_n_f32(vpowq_n_f32(vdivq_f32(num, den), 6.277394636015326), 1e4);
let rs = vmulq_n_f32(
vpowq_f32(vdivq_f32(num, den), vdupq_n_f32(6.277394636015326)),
1e4,
);
let flush_nan_mask = visnanq_f32(rs);
vbslq_f32(
vorrq_u32(flush_to_zero_mask, den_is_zero),
vorrq_u32(flush_to_zero_mask, flush_nan_mask),
vdupq_n_f32(0.),
rs,
)
Expand Down
11 changes: 10 additions & 1 deletion src/rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::euclidean::EuclideanDistance;
use crate::hsv::Hsv;
use crate::lab::Lab;
use crate::luv::Luv;
use crate::{Hsl, Jzazbz, LCh, Sigmoidal, TransferFunction, Xyz};
use crate::{Hsl, Jzazbz, LCh, Oklab, Sigmoidal, TransferFunction, Xyz};
use erydanos::Euclidean3DDistance;

#[derive(Debug, PartialOrd, PartialEq, Clone, Copy)]
Expand Down Expand Up @@ -98,6 +98,15 @@ impl Rgb<u8> {
)
}

/// Converts rgb to *Oklab*
///
/// # Arguments
/// `transfer_function` - Transfer function to convert into linear colorspace and backwards
#[inline]
pub fn to_oklab(&self, transfer_function: TransferFunction) -> Oklab {
Oklab::from_rgb(*self, transfer_function)
}

/// Converts rgb to S-shaped sigmoidized components
#[inline]
pub fn to_sigmoidal(&self) -> Sigmoidal {
Expand Down
11 changes: 8 additions & 3 deletions src/sse/image_to_jzazbz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

use erydanos::{_mm_atan2_ps, _mm_hypot_fast_ps, _mm_mlaf_ps};
use erydanos::{_mm_atan2_ps, _mm_hypot_fast_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_pow_ps};

use crate::image::ImageConfiguration;
use crate::image_to_jzazbz::JzazbzTarget;
Expand All @@ -30,8 +30,13 @@ macro_rules! perceptual_quantizer {
let xx = _mm_pow_n_ps(_mm_mul_ps($color, _mm_set1_ps(1e-4)), 0.1593017578125);
let jx = _mm_mlaf_ps(_mm_set1_ps(18.8515625), xx, _mm_set1_ps(0.8359375));
let den_jx = _mm_mlaf_ps(xx, _mm_set1_ps(18.6875), _mm_set1_ps(1.));
let rs = _mm_pow_n_ps(_mm_div_ps(jx, den_jx), 134.034375);
_mm_select_ps(flush_to_zero_mask, zeros, rs)
let rs = _mm_pow_ps(_mm_div_ps(jx, den_jx), _mm_set1_ps(134.034375));
let flush_nan_to_zero_mask = _mm_isnan_ps(rs);
_mm_select_ps(
_mm_or_ps(flush_to_zero_mask, flush_nan_to_zero_mask),
zeros,
rs,
)
}};
}

Expand Down
12 changes: 8 additions & 4 deletions src/sse/jzazbz_to_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

use erydanos::{_mm_cos_ps, _mm_mlaf_ps, _mm_sin_ps};
use erydanos::{_mm_cos_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_pow_ps, _mm_sin_ps};

use crate::image::ImageConfiguration;
use crate::image_to_jzazbz::JzazbzTarget;
Expand All @@ -30,12 +30,16 @@ macro_rules! perceptual_quantizer_inverse {
let xx = _mm_pow_n_ps($color, 7.460772656268214e-03);
let num = _mm_sub_ps(_mm_set1_ps(0.8359375), xx);
let den = _mm_mlaf_ps(xx, _mm_set1_ps(18.6875), _mm_set1_ps(-18.8515625));
let den_is_zero = _mm_cmpeq_ps(den, zeros);
let rs = _mm_mul_ps(
_mm_pow_n_ps(_mm_div_ps(num, den), 6.277394636015326),
_mm_pow_ps(_mm_div_ps(num, den), _mm_set1_ps(6.277394636015326)),
_mm_set1_ps(1e4),
);
_mm_select_ps(_mm_or_ps(flush_to_zero_mask, den_is_zero), zeros, rs)
let flush_nan_to_zero_mask = _mm_isnan_ps(rs);
_mm_select_ps(
_mm_or_ps(flush_to_zero_mask, flush_nan_to_zero_mask),
zeros,
rs,
)
}};
}

Expand Down
3 changes: 1 addition & 2 deletions src/sse/routines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
* // Use of this source code is governed by a BSD-style
* // license that can be found in the LICENSE file.
*/
use crate::sse::{sse_interleave_rgb_epi16, sse_interleave_rgba_epi16};

#[macro_export]
macro_rules! load_u8_and_deinterleave {
Expand Down Expand Up @@ -152,4 +151,4 @@ macro_rules! store_and_interleave_v3_u16 {
_mm_storeu_si128($ptr.add(8) as *mut __m128i, rgba1);
_mm_storeu_si128($ptr.add(16) as *mut __m128i, rgba2);
}};
}
}
37 changes: 0 additions & 37 deletions src/sse/support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,6 @@ pub unsafe fn sse_interleave_rgba(
(rgba_0_lo, rgba_0_hi, rgba_1_lo, rgba_1_hi)
}

#[inline(always)]
pub unsafe fn sse_transpose_x4(
r: __m128,
g: __m128,
b: __m128,
a: __m128,
) -> (__m128, __m128, __m128, __m128) {
let t0 = _mm_castps_si128(_mm_unpacklo_ps(r, g));
let t1 = _mm_castps_si128(_mm_unpacklo_ps(b, a));
let t2 = _mm_castps_si128(_mm_unpackhi_ps(r, g));
let t3 = _mm_castps_si128(_mm_unpackhi_ps(b, a));

let row1 = _mm_castsi128_ps(_mm_unpacklo_epi64(t0, t1));
let row2 = _mm_castsi128_ps(_mm_unpackhi_epi64(t0, t1));
let row3 = _mm_castsi128_ps(_mm_unpacklo_epi64(t2, t3));
let row4 = _mm_castsi128_ps(_mm_unpackhi_epi64(t2, t3));

(row1, row2, row3, row4)
}

#[inline(always)]
pub unsafe fn sse_interleave_ps_rgb(a: __m128, b: __m128, c: __m128) -> (__m128, __m128, __m128) {
const MASK_U0: i32 = shuffle(0, 0, 0, 0);
Expand Down Expand Up @@ -95,15 +75,6 @@ pub unsafe fn sse_interleave_ps_rgba(
(v0, v1, v2, v3)
}

#[inline(always)]
pub unsafe fn sse_store_rgba(ptr: *mut u8, r: __m128i, g: __m128i, b: __m128i, a: __m128i) {
let (row1, row2, row3, row4) = sse_interleave_rgba(r, g, b, a);
_mm_storeu_si128(ptr as *mut __m128i, row1);
_mm_storeu_si128(ptr.add(16) as *mut __m128i, row2);
_mm_storeu_si128(ptr.add(32) as *mut __m128i, row3);
_mm_storeu_si128(ptr.add(48) as *mut __m128i, row4);
}

#[inline(always)]
pub unsafe fn sse_deinterleave_rgba(
rgba0: __m128i,
Expand Down Expand Up @@ -308,14 +279,6 @@ pub unsafe fn sse_deinterleave_rgb_epi16(
(a0, b0, c0)
}

#[inline(always)]
pub unsafe fn sse_store_rgb_u8(ptr: *mut u8, r: __m128i, g: __m128i, b: __m128i) {
let (v0, v1, v2) = sse_interleave_rgb(r, g, b);
_mm_storeu_si128(ptr as *mut __m128i, v0);
_mm_storeu_si128(ptr.add(16) as *mut __m128i, v1);
_mm_storeu_si128(ptr.add(32) as *mut __m128i, v2);
}

#[inline(always)]
pub unsafe fn sse_deinterleave_rgba_ps(
t0: __m128,
Expand Down

0 comments on commit a30ea28

Please sign in to comment.