@@ -2895,7 +2895,45 @@ unittest
2895
2895
// TODO __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) pure @safe
2896
2896
// TODO __m128i _mm_sllv_epi64 (__m128i a, __m128i count) pure @safe
2897
2897
// TODO __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) pure @safe
2898
- // TODO __m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @safe
2898
+
2899
+ // / Shift packed 16-bit integers in `a` right by `count` while shifting in sign bits.
2900
+ // / Bit-shift is a single value in the low-order 64-bit of `count`.
2901
+ // / If bit-shift > 15, result is defined to be all sign bits.
2902
+ // / Warning: prefer `_mm256_srai_epi16`, less of a trap.
2903
+ __m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @trusted
2904
+ {
2905
+ static if (GDC_or_LDC_with_AVX2)
2906
+ {
2907
+ return cast (__m256i) __builtin_ia32_psraw256(cast (short16)a, cast (short8)count);
2908
+ }
2909
+ else
2910
+ {
2911
+ // split
2912
+ __m128i a_lo = _mm256_extractf128_si256! 0 (a);
2913
+ __m128i a_hi = _mm256_extractf128_si256! 1 (a);
2914
+ __m128i r_lo = _mm_sra_epi16(a_lo, count);
2915
+ __m128i r_hi = _mm_sra_epi16(a_hi, count);
2916
+ return _mm256_set_m128i (r_hi, r_lo);
2917
+ }
2918
+ }
2919
+ unittest
2920
+ {
2921
+ __m128i shift0 = _mm_setzero_si128();
2922
+ __m128i shiftX = _mm_set1_epi64x(0x8000_0000_0000_0000); // too large shift
2923
+ __m128i shift2 = _mm_setr_epi32(2 , 0 , 4 , 5 );
2924
+ __m256i A = _mm256_setr_epi16(4 , - 9 , 11 , - 32768 , 4 , - 8 , 11 , - 32768 ,
2925
+ 4 , - 9 , 11 , - 32768 , 4 , - 8 , 11 , - 32768 );
2926
+ short [16 ] correct0 = (cast (short16)A).array;
2927
+ short [16 ] correctX = [0 , - 1 , 0 , - 1 , 0 , - 1 , 0 , - 1 , 0 , - 1 , 0 , - 1 , 0 , - 1 , 0 , - 1 ];
2928
+ short [16 ] correct2 = [1 , - 3 , 2 , - 8192 , 1 , - 2 , 2 , - 8192 , 1 , - 3 , 2 , - 8192 , 1 , - 2 , 2 , - 8192 ];
2929
+ short16 B0 = cast (short16) _mm256_sra_epi16(A, shift0);
2930
+ short16 BX = cast (short16) _mm256_sra_epi16(A, shiftX);
2931
+ short16 B2 = cast (short16) _mm256_sra_epi16(A, shift2);
2932
+ assert (B0 .array == correct0);
2933
+ assert (BX .array == correctX);
2934
+ assert (B2 .array == correct2);
2935
+ }
2936
+
2899
2937
// TODO __m256i _mm256_sra_epi32 (__m256i a, __m128i count) pure @safe
2900
2938
2901
2939
// / Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign bits.
0 commit comments