Skip to content

Commit 993f652

Browse files
committed
improved ltabs, gtabs, convert u8/i16/u16 and convert128_32f64f
1 parent 8b1f5f3 commit 993f652

28 files changed

+784
-832
lines changed

.clang-format

100644100755
File mode changed.

LICENSE

100644100755
File mode changed.

README.md

100644100755
File mode changed.

TODO

100644100755
File mode changed.

avx512_mathfun.h

100644100755
File mode changed.

avx_mathfun.h

100644100755
File mode changed.

latencies.txt

100644100755
File mode changed.

mysincosf.h

100644100755
File mode changed.

neon_mathfun.h

100644100755
File mode changed.

simd_test.c

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
* Licence : BSD-2
66
*/
77

8+
#include <fenv.h>
89
#include <math.h>
910
#include <stdio.h>
1011
#include <stdlib.h>
12+
#include <string.h>
1113
#include <sys/time.h>
1214
#include <time.h>
1315
#include "simd_utils.h"
@@ -6031,6 +6033,24 @@ printf("\n");
60316033
printf("tan128d %d %lf\n", len, elapsed);
60326034

60336035
l2_errd(inoutd_ref, inoutd2, len);
6036+
6037+
#ifdef ICC
6038+
clock_gettime(CLOCK_REALTIME, &start);
6039+
tan128d_svml(inoutd, inoutd2, len);
6040+
clock_gettime(CLOCK_REALTIME, &stop);
6041+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
6042+
printf("tan128d_svml %d %lf\n", len, elapsed);
6043+
6044+
clock_gettime(CLOCK_REALTIME, &start);
6045+
for (l = 0; l < loop; l++)
6046+
tan128d_svml(inoutd, inoutd2, len);
6047+
clock_gettime(CLOCK_REALTIME, &stop);
6048+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
6049+
printf("tan128d_svml %d %lf\n", len, elapsed);
6050+
6051+
l2_errd(inoutd_ref, inoutd2, len);
6052+
#endif
6053+
60346054
#endif
60356055

60366056
#ifdef AVX
@@ -6047,9 +6067,27 @@ printf("\n");
60476067
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
60486068
printf("tan256d %d %lf\n", len, elapsed);
60496069

6070+
l2_errd(inoutd_ref, inoutd2, len);
6071+
6072+
#ifdef ICC
6073+
clock_gettime(CLOCK_REALTIME, &start);
6074+
tan256d_svml(inoutd, inoutd2, len);
6075+
clock_gettime(CLOCK_REALTIME, &stop);
6076+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
6077+
printf("tan256d_svml %d %lf\n", len, elapsed);
6078+
6079+
clock_gettime(CLOCK_REALTIME, &start);
6080+
for (l = 0; l < loop; l++)
6081+
tan256d_svml(inoutd, inoutd2, len);
6082+
clock_gettime(CLOCK_REALTIME, &stop);
6083+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
6084+
printf("tan256d_svml %d %lf\n", len, elapsed);
6085+
60506086
l2_errd(inoutd_ref, inoutd2, len);
60516087
#endif
60526088

6089+
#endif
6090+
60536091
#ifdef AVX512
60546092
clock_gettime(CLOCK_REALTIME, &start);
60556093
tan512d(inoutd, inoutd2, len);
@@ -6066,6 +6104,24 @@ printf("\n");
60666104

60676105
l2_errd(inoutd_ref, inoutd2, len);
60686106
// for(int i = 0; i < 512len; i++) printf("%lf %lf %lf \n",inoutd[i],inoutd_ref[i],inoutd2[i]);
6107+
6108+
#ifdef ICC
6109+
clock_gettime(CLOCK_REALTIME, &start);
6110+
tan512d_svml(inoutd, inoutd2, len);
6111+
clock_gettime(CLOCK_REALTIME, &stop);
6112+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
6113+
printf("tan512d_svml %d %lf\n", len, elapsed);
6114+
6115+
clock_gettime(CLOCK_REALTIME, &start);
6116+
for (l = 0; l < loop; l++)
6117+
tan512d_svml(inoutd, inoutd2, len);
6118+
clock_gettime(CLOCK_REALTIME, &stop);
6119+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
6120+
printf("tan512d_svml %d %lf\n", len, elapsed);
6121+
6122+
l2_errd(inoutd_ref, inoutd2, len);
6123+
#endif
6124+
60696125
#endif
60706126

60716127
printf("\n");
@@ -8118,21 +8174,6 @@ for (int i = 0; i < len; i++){
81188174
printf("convertFloat32ToU8_128 %d %lf\n", len, elapsed);
81198175

81208176
l2_err_u8(inout_u1, inout_u2, len);
8121-
8122-
clock_gettime(CLOCK_REALTIME, &start);
8123-
convertFloat32ToU8_128_(inout, inout_u1, len, RndZero, 4);
8124-
clock_gettime(CLOCK_REALTIME, &stop);
8125-
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
8126-
printf("convertFloat32ToU8_128_ %d %lf\n", len, elapsed);
8127-
8128-
clock_gettime(CLOCK_REALTIME, &start);
8129-
for (l = 0; l < loop; l++)
8130-
convertFloat32ToU8_128_(inout, inout_u1, len, RndZero, 4);
8131-
clock_gettime(CLOCK_REALTIME, &stop);
8132-
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
8133-
printf("convertFloat32ToU8_128_ %d %lf\n", len, elapsed);
8134-
8135-
l2_err_u8(inout_u1, inout_u2, len);
81368177
#endif
81378178

81388179
/*for(int i = 0; i < len; i++)
@@ -8226,24 +8267,6 @@ for (int i = 0; i < len; i++){
82268267
printf("convertFloat32ToI16_128 %d %lf\n", len, elapsed);
82278268

82288269
l2_err_i16(inout_s1, inout_s2, len);
8229-
8230-
8231-
clock_gettime(CLOCK_REALTIME, &start);
8232-
convertFloat32ToI16_128_(inout, inout_s2, len, RndZero, 4);
8233-
clock_gettime(CLOCK_REALTIME, &stop);
8234-
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
8235-
printf("convertFloat32ToI16_128_ %d %lf\n", len, elapsed);
8236-
8237-
clock_gettime(CLOCK_REALTIME, &start);
8238-
for (l = 0; l < loop; l++)
8239-
convertFloat32ToI16_128_(inout, inout_s2, len, RndZero, 4);
8240-
clock_gettime(CLOCK_REALTIME, &stop);
8241-
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
8242-
printf("convertFloat32ToI16_128_ %d %lf\n", len, elapsed);
8243-
8244-
l2_err_i16(inout_s1, inout_s2, len);
8245-
/*for(int i=0; i < len; i++)
8246-
printf("%f %u %u\n",inout[i], (uint16_t)inout_s1[i], (uint16_t)inout_s2[i]);*/
82478270
#endif
82488271

82498272
#ifdef AVX

0 commit comments

Comments
 (0)