Skip to content

Commit e57c726

Browse files
author
Guillaume Piolat
committed
Add support for _mm256_sign_epi8
1 parent 8f4951a commit e57c726

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

source/inteli/avx2intrin.d

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2731,7 +2731,45 @@ unittest
27312731
// TODO __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8) pure @safe
27322732
// TODO __m256i _mm256_sign_epi16 (__m256i a, __m256i b) pure @safe
27332733
// TODO __m256i _mm256_sign_epi32 (__m256i a, __m256i b) pure @safe
2734-
// TODO __m256i _mm256_sign_epi8 (__m256i a, __m256i b) pure @safe
2734+
2735+
2736+
/// Negate packed signed 8-bit integers in `a` when the corresponding signed 8-bit integer in `b` is negative.
2737+
/// Elements in result are zeroed out when the corresponding element in `b` is zero.
2738+
__m256i _mm256_sign_epi8 (__m256i a, __m256i b) pure @safe
2739+
{
2740+
// PERF DMD
2741+
static if (GDC_with_AVX2)
2742+
{
2743+
return cast(__m256i) __builtin_ia32_psignb256(cast(ubyte32)a, cast(ubyte32)b);
2744+
}
2745+
else static if (LDC_with_AVX2)
2746+
{
2747+
return cast(__m256i) __builtin_ia32_psignb256(cast(byte32)a, cast(byte32)b);
2748+
}
2749+
else // split
2750+
{
2751+
// LDC arm64, 10 inst since LDC 1.32.1 -O1
2752+
__m128i a_lo = _mm256_extractf128_si256!0(a);
2753+
__m128i a_hi = _mm256_extractf128_si256!1(a);
2754+
__m128i b_lo = _mm256_extractf128_si256!0(b);
2755+
__m128i b_hi = _mm256_extractf128_si256!1(b);
2756+
__m128i r_lo = _mm_sign_epi8(a_lo, b_lo);
2757+
__m128i r_hi = _mm_sign_epi8(a_hi, b_hi);
2758+
return _mm256_set_m128i(r_hi, r_lo);
2759+
}
2760+
// PERF: not optimal in AVX without AVX2
2761+
}
2762+
unittest
2763+
{
2764+
__m256i A = _mm256_setr_epi8( 1, 1, 1, 1, 1, 1, -2, 1, 0, 1, 0, 0, 0, 0, -2, 1,
2765+
-2, -1, 0, 1, 2, byte.min, byte.min, byte.min, -1, 0,-1, 1, -2, -50, 0, 50);
2766+
__m256i B = _mm256_setr_epi8(-1, 0,-1, 1, -2, -50, 0, 50, -1, 0,-1, 1, -2, -50, 0, 50,
2767+
-1, 0,-1, 1, -2, -50, 0, 50, -2, -1, 0, 1, 2, byte.min, byte.min, byte.min);
2768+
byte32 C = cast(byte32) _mm256_sign_epi8(A, B);
2769+
byte[32] correct = [ -1, 0,-1, 1, -1, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
2770+
2, 0, 0, 1, -2, byte.min, 0, byte.min, 1, 0, 0, 1, -2, 50, 0, -50];
2771+
assert(C.array == correct);
2772+
}
27352773

27362774
/// Shift packed 16-bit integers in `a` left by `count` while shifting in zeroes.
27372775
/// Bit-shift is a single value in the low-order 64-bit of `count`.

source/inteli/tmmintrin.d

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,7 @@ unittest
12481248

12491249
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit integer in `b` is negative.
12501250
/// Elements in result are zeroed out when the corresponding element in `b` is zero.
1251-
__m128i _mm_sign_epi8 (__m128i a, __m128i b) @trusted
1251+
__m128i _mm_sign_epi8 (__m128i a, __m128i b) pure @trusted
12521252
{
12531253
// PERF DMD
12541254
static if (GDC_with_SSSE3)

0 commit comments

Comments
 (0)