Skip to content

Commit eee97e2

Browse files
author
Guillaume Piolat
committed
Add support for _mm256_sra_epi16
1 parent 52906ba commit eee97e2

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

source/inteli/avx2intrin.d

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2895,7 +2895,45 @@ unittest
28952895
// TODO __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) pure @safe
28962896
// TODO __m128i _mm_sllv_epi64 (__m128i a, __m128i count) pure @safe
28972897
// TODO __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) pure @safe
2898-
// TODO __m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @safe
2898+
2899+
/// Shift packed 16-bit integers in `a` right by `count` while shifting in sign bits.
2900+
/// Bit-shift is a single value in the low-order 64-bit of `count`.
2901+
/// If bit-shift > 15, result is defined to be all sign bits.
2902+
/// Warning: prefer `_mm256_srai_epi16`, less of a trap.
2903+
__m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @trusted
2904+
{
2905+
static if (GDC_or_LDC_with_AVX2)
2906+
{
2907+
return cast(__m256i) __builtin_ia32_psraw256(cast(short16)a, cast(short8)count);
2908+
}
2909+
else
2910+
{
2911+
// split
2912+
__m128i a_lo = _mm256_extractf128_si256!0(a);
2913+
__m128i a_hi = _mm256_extractf128_si256!1(a);
2914+
__m128i r_lo = _mm_sra_epi16(a_lo, count);
2915+
__m128i r_hi = _mm_sra_epi16(a_hi, count);
2916+
return _mm256_set_m128i(r_hi, r_lo);
2917+
}
2918+
}
2919+
unittest
2920+
{
2921+
__m128i shift0 = _mm_setzero_si128();
2922+
__m128i shiftX = _mm_set1_epi64x(0x8000_0000_0000_0000); // too large shift
2923+
__m128i shift2 = _mm_setr_epi32(2, 0, 4, 5);
2924+
__m256i A = _mm256_setr_epi16(4, -9, 11, -32768, 4, -8, 11, -32768,
2925+
4, -9, 11, -32768, 4, -8, 11, -32768);
2926+
short[16] correct0 = (cast(short16)A).array;
2927+
short[16] correctX = [0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1];
2928+
short[16] correct2 = [1, -3, 2, -8192, 1, -2, 2, -8192, 1, -3, 2, -8192, 1, -2, 2, -8192];
2929+
short16 B0 = cast(short16) _mm256_sra_epi16(A, shift0);
2930+
short16 BX = cast(short16) _mm256_sra_epi16(A, shiftX);
2931+
short16 B2 = cast(short16) _mm256_sra_epi16(A, shift2);
2932+
assert(B0.array == correct0);
2933+
assert(BX.array == correctX);
2934+
assert(B2.array == correct2);
2935+
}
2936+
28992937
// TODO __m256i _mm256_sra_epi32 (__m256i a, __m128i count) pure @safe
29002938

29012939
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign bits.

0 commit comments

Comments
 (0)