@@ -460,7 +460,7 @@ static inline void vectorSlope128d(double *dst, int len, double offset, double s
460460// in SSE, missing _mm_cvtepi64_pd, _mm_cvttpd_epi64
461461// See : https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
462462
463- static inline v2sd _mm_cvtepi64_pd_custom (v2si x )
463+ static inline v2sd _mm_cvtepi64_pd_custom (v2sid x )
464464{
465465#if 0
466466 //Signed
@@ -473,7 +473,7 @@ static inline v2sd _mm_cvtepi64_pd_custom(v2si x)
473473#endif
474474}
475475
476- static inline v2si _mm_cvttpd_epi64_custom (v2sd x )
476+ static inline v2sid _mm_cvttpd_epi64_custom (v2sd x )
477477{
478478 // Signed
479479#if 0
@@ -495,7 +495,7 @@ static inline void sincos_pd(v2sd x, v2sd *s, v2sd *c)
495495{
496496 v2sd xmm1 , xmm2 , xmm3 = _mm_setzero_pd (), sign_bit_sin , y ;
497497
498- v2si emm0 , emm2 , emm4 ;
498+ v2sid emm0 , emm2 , emm4 ;
499499
500500 sign_bit_sin = x ;
501501 /* take the absolute value */
@@ -510,21 +510,21 @@ static inline void sincos_pd(v2sd x, v2sd *s, v2sd *c)
510510 /* store the integer part of y in emm2 */
511511 emm2 = _mm_cvttpd_epi64_custom (y );
512512 /* j=(j+1) & (~1) (see the cephes sources) */
513- emm2 = _mm_add_epi64 (emm2 , * (v2si * ) _pi64_1 );
513+ emm2 = _mm_add_epi64 (emm2 , * (v2sid * ) _pi64_1 );
514514
515- emm2 = _mm_and_si128 (emm2 , * (v2si * ) _pi64_inv1 );
515+ emm2 = _mm_and_si128 (emm2 , * (v2sid * ) _pi64_inv1 );
516516 y = _mm_cvtepi64_pd_custom (emm2 );
517517 emm4 = emm2 ;
518518
519519 /* get the swap sign flag for the sine */
520- emm0 = _mm_and_si128 (emm2 , * (v2si * ) _pi64_4 );
520+ emm0 = _mm_and_si128 (emm2 , * (v2sid * ) _pi64_4 );
521521 // print2i(emm0);
522522 emm0 = _mm_slli_epi64 (emm0 , 61 );
523523 // print2i(emm0);
524524 v2sd swap_sign_bit_sin = _mm_castsi128_pd (emm0 );
525525
526526 /* get the polynom selection mask for the sine*/
527- emm2 = _mm_and_si128 (emm2 , * (v2si * ) _pi64_2 );
527+ emm2 = _mm_and_si128 (emm2 , * (v2sid * ) _pi64_2 );
528528 // SSE3
529529 emm2 = _mm_cmpeq_epi64 (emm2 , _mm_setzero_si128 ());
530530 v2sd poly_mask = _mm_castsi128_pd (emm2 );
@@ -535,8 +535,8 @@ static inline void sincos_pd(v2sd x, v2sd *s, v2sd *c)
535535 x = _mm_fmadd_pd_custom (y , * (v2sd * ) _pd_minus_cephes_DP2 , x );
536536 x = _mm_fmadd_pd_custom (y , * (v2sd * ) _pd_minus_cephes_DP3 , x );
537537
538- emm4 = _mm_sub_epi64 (emm4 , * (v2si * ) _pi64_2 );
539- emm4 = _mm_andnot_si128 (emm4 , * (v2si * ) _pi64_4 );
538+ emm4 = _mm_sub_epi64 (emm4 , * (v2sid * ) _pi64_2 );
539+ emm4 = _mm_andnot_si128 (emm4 , * (v2sid * ) _pi64_4 );
540540 emm4 = _mm_slli_epi64 (emm4 , 61 );
541541 v2sd sign_bit_cos = _mm_castsi128_pd (emm4 );
542542
@@ -977,7 +977,7 @@ static inline v2sd exp_pd(v2sd x)
977977{
978978 v2sd tmp = _mm_setzero_pd (), fx ;
979979
980- v2si emm0 ;
980+ v2sid emm0 ;
981981
982982 v2sd one = * (v2sd * ) _pd_1 ;
983983 v2sd two = * (v2sd * ) _pd_2 ;
@@ -1015,7 +1015,7 @@ static inline v2sd exp_pd(v2sd x)
10151015
10161016 /* build 2^n */
10171017 emm0 = _mm_cvttpd_epi64_custom (fx );
1018- emm0 = _mm_add_epi64 (emm0 , * (v2si * ) _pi64_0x7f );
1018+ emm0 = _mm_add_epi64 (emm0 , * (v2sid * ) _pi64_0x7f );
10191019 emm0 = _mm_slli_epi64 (emm0 , 52 );
10201020 v2sd pow2n = _mm_castsi128_pd (emm0 );
10211021
@@ -1025,7 +1025,7 @@ static inline v2sd exp_pd(v2sd x)
10251025
10261026static inline v2sd log_pd (v2sd x )
10271027{
1028- v2si emm0 ;
1028+ v2sid emm0 ;
10291029 v2sd one = * (v2sd * ) _pd_1 ;
10301030
10311031 v2sd invalid_mask = _mm_cmple_pd (x , _mm_setzero_pd ());
0 commit comments