5
5
* Licence : BSD-2
6
6
*/
7
7
8
+ #include <fenv.h>
8
9
#include <math.h>
9
10
#include <stdio.h>
10
11
#include <stdlib.h>
12
+ #include <string.h>
11
13
#include <sys/time.h>
12
14
#include <time.h>
13
15
#include "simd_utils.h"
@@ -6031,6 +6033,24 @@ printf("\n");
6031
6033
printf ("tan128d %d %lf\n" , len , elapsed );
6032
6034
6033
6035
l2_errd (inoutd_ref , inoutd2 , len );
6036
+
6037
+ #ifdef ICC
6038
+ clock_gettime (CLOCK_REALTIME , & start );
6039
+ tan128d_svml (inoutd , inoutd2 , len );
6040
+ clock_gettime (CLOCK_REALTIME , & stop );
6041
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
6042
+ printf ("tan128d_svml %d %lf\n" , len , elapsed );
6043
+
6044
+ clock_gettime (CLOCK_REALTIME , & start );
6045
+ for (l = 0 ; l < loop ; l ++ )
6046
+ tan128d_svml (inoutd , inoutd2 , len );
6047
+ clock_gettime (CLOCK_REALTIME , & stop );
6048
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
6049
+ printf ("tan128d_svml %d %lf\n" , len , elapsed );
6050
+
6051
+ l2_errd (inoutd_ref , inoutd2 , len );
6052
+ #endif
6053
+
6034
6054
#endif
6035
6055
6036
6056
#ifdef AVX
@@ -6047,9 +6067,27 @@ printf("\n");
6047
6067
elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
6048
6068
printf ("tan256d %d %lf\n" , len , elapsed );
6049
6069
6070
+ l2_errd (inoutd_ref , inoutd2 , len );
6071
+
6072
+ #ifdef ICC
6073
+ clock_gettime (CLOCK_REALTIME , & start );
6074
+ tan256d_svml (inoutd , inoutd2 , len );
6075
+ clock_gettime (CLOCK_REALTIME , & stop );
6076
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
6077
+ printf ("tan256d_svml %d %lf\n" , len , elapsed );
6078
+
6079
+ clock_gettime (CLOCK_REALTIME , & start );
6080
+ for (l = 0 ; l < loop ; l ++ )
6081
+ tan256d_svml (inoutd , inoutd2 , len );
6082
+ clock_gettime (CLOCK_REALTIME , & stop );
6083
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
6084
+ printf ("tan256d_svml %d %lf\n" , len , elapsed );
6085
+
6050
6086
l2_errd (inoutd_ref , inoutd2 , len );
6051
6087
#endif
6052
6088
6089
+ #endif
6090
+
6053
6091
#ifdef AVX512
6054
6092
clock_gettime (CLOCK_REALTIME , & start );
6055
6093
tan512d (inoutd , inoutd2 , len );
@@ -6066,6 +6104,24 @@ printf("\n");
6066
6104
6067
6105
l2_errd (inoutd_ref , inoutd2 , len );
6068
6106
// for(int i = 0; i < 512len; i++) printf("%lf %lf %lf \n",inoutd[i],inoutd_ref[i],inoutd2[i]);
6107
+
6108
+ #ifdef ICC
6109
+ clock_gettime (CLOCK_REALTIME , & start );
6110
+ tan512d_svml (inoutd , inoutd2 , len );
6111
+ clock_gettime (CLOCK_REALTIME , & stop );
6112
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
6113
+ printf ("tan512d_svml %d %lf\n" , len , elapsed );
6114
+
6115
+ clock_gettime (CLOCK_REALTIME , & start );
6116
+ for (l = 0 ; l < loop ; l ++ )
6117
+ tan512d_svml (inoutd , inoutd2 , len );
6118
+ clock_gettime (CLOCK_REALTIME , & stop );
6119
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
6120
+ printf ("tan512d_svml %d %lf\n" , len , elapsed );
6121
+
6122
+ l2_errd (inoutd_ref , inoutd2 , len );
6123
+ #endif
6124
+
6069
6125
#endif
6070
6126
6071
6127
printf ("\n" );
@@ -8118,21 +8174,6 @@ for (int i = 0; i < len; i++){
8118
8174
printf ("convertFloat32ToU8_128 %d %lf\n" , len , elapsed );
8119
8175
8120
8176
l2_err_u8 (inout_u1 , inout_u2 , len );
8121
-
8122
- clock_gettime (CLOCK_REALTIME , & start );
8123
- convertFloat32ToU8_128_ (inout , inout_u1 , len , RndZero , 4 );
8124
- clock_gettime (CLOCK_REALTIME , & stop );
8125
- elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
8126
- printf ("convertFloat32ToU8_128_ %d %lf\n" , len , elapsed );
8127
-
8128
- clock_gettime (CLOCK_REALTIME , & start );
8129
- for (l = 0 ; l < loop ; l ++ )
8130
- convertFloat32ToU8_128_ (inout , inout_u1 , len , RndZero , 4 );
8131
- clock_gettime (CLOCK_REALTIME , & stop );
8132
- elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
8133
- printf ("convertFloat32ToU8_128_ %d %lf\n" , len , elapsed );
8134
-
8135
- l2_err_u8 (inout_u1 , inout_u2 , len );
8136
8177
#endif
8137
8178
8138
8179
/*for(int i = 0; i < len; i++)
@@ -8226,24 +8267,6 @@ for (int i = 0; i < len; i++){
8226
8267
printf ("convertFloat32ToI16_128 %d %lf\n" , len , elapsed );
8227
8268
8228
8269
l2_err_i16 (inout_s1 , inout_s2 , len );
8229
-
8230
-
8231
- clock_gettime (CLOCK_REALTIME , & start );
8232
- convertFloat32ToI16_128_ (inout , inout_s2 , len , RndZero , 4 );
8233
- clock_gettime (CLOCK_REALTIME , & stop );
8234
- elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
8235
- printf ("convertFloat32ToI16_128_ %d %lf\n" , len , elapsed );
8236
-
8237
- clock_gettime (CLOCK_REALTIME , & start );
8238
- for (l = 0 ; l < loop ; l ++ )
8239
- convertFloat32ToI16_128_ (inout , inout_s2 , len , RndZero , 4 );
8240
- clock_gettime (CLOCK_REALTIME , & stop );
8241
- elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
8242
- printf ("convertFloat32ToI16_128_ %d %lf\n" , len , elapsed );
8243
-
8244
- l2_err_i16 (inout_s1 , inout_s2 , len );
8245
- /*for(int i=0; i < len; i++)
8246
- printf("%f %u %u\n",inout[i], (uint16_t)inout_s1[i], (uint16_t)inout_s2[i]);*/
8247
8270
#endif
8248
8271
8249
8272
#ifdef AVX
0 commit comments