@@ -2731,7 +2731,45 @@ unittest
2731
2731
// TODO __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) pure @safe
2732
2732
// TODO __m256i _mm256_sign_epi16 (__m256i a, __m256i b) pure @safe
2733
2733
// TODO __m256i _mm256_sign_epi32 (__m256i a, __m256i b) pure @safe
2734
- // TODO __m256i _mm256_sign_epi8 (__m256i a, __m256i b) pure @safe
2734
+
2735
+
2736
+ // / Negate packed signed 8-bit integers in `a` when the corresponding signed 8-bit integer in `b` is negative.
2737
+ // / Elements in result are zeroed out when the corresponding element in `b` is zero.
2738
+ __m256i _mm256_sign_epi8 (__m256i a, __m256i b) pure @safe
2739
+ {
2740
+ // PERF DMD
2741
+ static if (GDC_with_AVX2)
2742
+ {
2743
+ return cast (__m256i) __builtin_ia32_psignb256(cast (ubyte32)a, cast (ubyte32)b);
2744
+ }
2745
+ else static if (LDC_with_AVX2)
2746
+ {
2747
+ return cast (__m256i) __builtin_ia32_psignb256(cast (byte32)a, cast (byte32)b);
2748
+ }
2749
+ else // split
2750
+ {
2751
+ // LDC arm64, 10 inst since LDC 1.32.1 -O1
2752
+ __m128i a_lo = _mm256_extractf128_si256! 0 (a);
2753
+ __m128i a_hi = _mm256_extractf128_si256! 1 (a);
2754
+ __m128i b_lo = _mm256_extractf128_si256! 0 (b);
2755
+ __m128i b_hi = _mm256_extractf128_si256! 1 (b);
2756
+ __m128i r_lo = _mm_sign_epi8(a_lo, b_lo);
2757
+ __m128i r_hi = _mm_sign_epi8(a_hi, b_hi);
2758
+ return _mm256_set_m128i (r_hi, r_lo);
2759
+ }
2760
+ // PERF: not optimal in AVX without AVX2
2761
+ }
2762
+ unittest
2763
+ {
2764
+ __m256i A = _mm256_setr_epi8( 1 , 1 , 1 , 1 , 1 , 1 , - 2 , 1 , 0 , 1 , 0 , 0 , 0 , 0 , - 2 , 1 ,
2765
+ - 2 , - 1 , 0 , 1 , 2 , byte .min, byte .min, byte .min, - 1 , 0 ,- 1 , 1 , - 2 , - 50 , 0 , 50 );
2766
+ __m256i B = _mm256_setr_epi8(- 1 , 0 ,- 1 , 1 , - 2 , - 50 , 0 , 50 , - 1 , 0 ,- 1 , 1 , - 2 , - 50 , 0 , 50 ,
2767
+ - 1 , 0 ,- 1 , 1 , - 2 , - 50 , 0 , 50 , - 2 , - 1 , 0 , 1 , 2 , byte .min, byte .min, byte .min);
2768
+ byte32 C = cast (byte32) _mm256_sign_epi8(A, B);
2769
+ byte [32 ] correct = [ - 1 , 0 ,- 1 , 1 , - 1 , - 1 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 ,
2770
+ 2 , 0 , 0 , 1 , - 2 , byte .min, 0 , byte .min, 1 , 0 , 0 , 1 , - 2 , 50 , 0 , - 50 ];
2771
+ assert (C.array == correct);
2772
+ }
2735
2773
2736
2774
// / Shift packed 16-bit integers in `a` left by `count` while shifting in zeroes.
2737
2775
// / Bit-shift is a single value in the low-order 64-bit of `count`.
0 commit comments