Skip to content

Commit c757480

Browse files
authored
[SYCL] Add sigmoid in intel math functions (#20738)
Signed-off-by: jinge90 <[email protected]>
1 parent d1478de commit c757480

File tree

9 files changed

+221
-0
lines changed

9 files changed

+221
-0
lines changed

libdevice/device_imf.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,5 +700,36 @@ template <typename Ty> static inline Ty __srhadd(Ty x, Ty y) {
700700
return __spirv_ocl_s_rhadd(x, y);
701701
#endif
702702
}
703+
704+
static inline float __fsigm(float x) {
705+
#if defined(__LIBDEVICE_HOST_IMPL__)
706+
return 1.0f / (1.0f + __builtin_expf(-x));
707+
#elif defined(__SPIR__) || defined(__SPIRV__)
708+
return 1.0f / (1.0f + __spirv_ocl_exp(-x));
709+
#endif
710+
}
711+
712+
static inline _iml_half __fsigm(_iml_half x) {
713+
#if defined(__LIBDEVICE_HOST_IMPL__)
714+
float tmp_x = __half2float(x.get_internal());
715+
float res = 1.0f / (1.0f + __builtin_expf(-tmp_x));
716+
return _iml_half(__float2half(res));
717+
#elif defined(__SPIR__) || defined(__SPIRV__)
718+
_iml_half_internal tmp_x = x.get_internal();
719+
float res_f = 1.0f / (1.0f + __half2float(__spirv_ocl_exp(-tmp_x)));
720+
return __float2half(res_f);
721+
#endif
722+
}
723+
724+
static inline uint16_t __fsigm(uint16_t x) {
725+
float tmp_x = __bfloat162float(x);
726+
float res;
727+
#if defined(__LIBDEVICE_HOST_IMPL__)
728+
res = 1.0f / (1.0f + __builtin_expf(-tmp_x));
729+
#elif defined(__SPIR__) || defined(__SPIRV__)
730+
res = 1.0f / (1.0f + __spirv_ocl_exp(-tmp_x));
731+
#endif
732+
return __float2bfloat16(res, __IML_RTE);
733+
}
703734
#endif // __LIBDEVICE_IMF_ENABLED__
704735
#endif // __LIBDEVICE_DEVICE_IMF_H__

libdevice/imf/imf_inline_bf16.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,9 @@ DEVICE_EXTERN_C_INLINE
7070
_iml_bf16_internal __devicelib_imf_truncbf16(_iml_bf16_internal a) {
7171
return __trunc(_iml_bf16(a)).get_internal();
7272
}
73+
74+
DEVICE_EXTERN_C_INLINE
75+
_iml_bf16_internal __devicelib_imf_fsigmbf16(_iml_bf16_internal x) {
76+
return __fsigm(x);
77+
}
7378
#endif

libdevice/imf/imf_inline_fp32.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,14 @@ DEVICE_EXTERN_C_INLINE float __devicelib_imf_invf(float a) { return 1.0f / a; }
130130
DEVICE_EXTERN_C_INLINE float __devicelib_imf_copysignf(float a, float b) {
131131
return __copysign(a, b);
132132
}
133+
134+
DEVICE_EXTERN_C_INLINE float __devicelib_imf_fsigmf(float x) {
135+
return __fsigm(x);
136+
}
137+
138+
DEVICE_EXTERN_C_INLINE
139+
_iml_half_internal __devicelib_imf_fsigmf16(_iml_half_internal x) {
140+
_iml_half hx(x);
141+
return __fsigm(hx).get_internal();
142+
}
133143
#endif /*__LIBDEVICE_IMF_ENABLED__*/

libdevice/imf_wrapper.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,4 +2344,21 @@ float __devicelib_imf_sqrtf_rz(float);
23442344

23452345
DEVICE_EXTERN_C_INLINE
23462346
float __imf_sqrtf_rz(float x) { return __devicelib_imf_sqrtf_rz(x); }
2347+
2348+
/// --------------------------------------------------------------------------
2349+
/// sigmoid(x) function
2350+
/// sigmoid(x) = 1 / (1 + exp(-x))
2351+
/// --------------------------------------------------------------------------
2352+
DEVICE_EXTERN_C_INLINE float __devicelib_imf_fsigmf(float x);
2353+
2354+
DEVICE_EXTERN_C_INLINE
2355+
_iml_half_internal __devicelib_imf_fsigmf16(_iml_half_internal x);
2356+
2357+
DEVICE_EXTERN_C_INLINE _iml_half_internal __imf_fsigmf16(_iml_half_internal x) {
2358+
return __devicelib_imf_fsigmf16(x);
2359+
}
2360+
2361+
DEVICE_EXTERN_C_INLINE float __imf_fsigmf(float x) {
2362+
return __devicelib_imf_fsigmf(x);
2363+
}
23472364
#endif // __LIBDEVICE_IMF_ENABLED__

libdevice/imf_wrapper_bf16.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,4 +571,12 @@ DEVICE_EXTERN_C_INLINE
571571
_iml_bf16_internal __imf_truncbf16(_iml_bf16_internal a) {
572572
return __devicelib_imf_truncbf16(a);
573573
}
574+
575+
DEVICE_EXTERN_C_INLINE
576+
_iml_bf16_internal __devicelib_imf_fsigmbf16(_iml_bf16_internal x);
577+
578+
DEVICE_EXTERN_C_INLINE
579+
_iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal x) {
580+
return __devicelib_imf_fsigmbf16(x);
581+
}
574582
#endif // __LIBDEVICE_IMF_ENABLED__

sycl/include/sycl/builtins.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float x);
117117
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float x);
118118
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float x);
119119
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float x);
120+
extern __DPCPP_SYCL_EXTERNAL float __imf_fsigmf(float x);
120121
extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_rd(float x, float y, float z);
121122
extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_rn(float x, float y, float z);
122123
extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_ru(float x, float y, float z);
@@ -233,6 +234,7 @@ extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_rsqrtf16(_Float16 x);
233234
extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_invf16(_Float16 x);
234235
extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fmaxf16(_Float16 x, _Float16 y);
235236
extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fminf16(_Float16 x, _Float16 y);
237+
extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fsigmf16(_Float16 x);
236238
extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_copysignf16(_Float16 x, _Float16 y);
237239
extern __DPCPP_SYCL_EXTERNAL float __imf_half2float(_Float16 x);
238240
extern __DPCPP_SYCL_EXTERNAL float __imf_bfloat162float(uint16_t x);
@@ -323,6 +325,7 @@ extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ceilbf16(uint16_t x);
323325
extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_truncbf16(uint16_t x);
324326
extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_copysignbf16(uint16_t x,
325327
uint16_t y);
328+
extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fsigmbf16(uint16_t x);
326329
extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_sqrtbf16(uint16_t x);
327330
extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_rsqrtbf16(uint16_t x);
328331
extern __DPCPP_SYCL_EXTERNAL double __imf_fma(double x, double y, double z);

sycl/include/sycl/ext/intel/math.hpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ using _iml_half_internal = _Float16;
2121
using _iml_half_internal = uint16_t;
2222
#endif
2323

24+
using _iml_bf16_internal = uint16_t;
25+
2426
#include <sycl/bit_cast.hpp>
2527
#include <sycl/builtins.hpp>
2628
#include <sycl/ext/intel/math/imf_fp_conversions.hpp>
@@ -43,6 +45,9 @@ _iml_half_internal __imf_ceilf16(_iml_half_internal);
4345
float __imf_floorf(float);
4446
double __imf_floor(double);
4547
_iml_half_internal __imf_floorf16(_iml_half_internal);
48+
float __imf_fsigmf(float);
49+
_iml_half_internal __imf_fsigmf16(_iml_half_internal);
50+
_iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal);
4651
float __imf_rintf(float);
4752
double __imf_rint(double);
4853
_iml_half_internal __imf_invf16(_iml_half_internal);
@@ -242,6 +247,33 @@ template <typename Tp>
242247
std::enable_if_t<std::is_same_v<Tp, double>, double> rcp64h(Tp x) {
243248
return __imf_rcp64h(x);
244249
}
250+
/// --------------------------------------------------------------------------
251+
/// sigmoid(x) function
252+
/// --------------------------------------------------------------------------
253+
extern "C" {
254+
_iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal x);
255+
_iml_half_internal __imf_fsigmf16(_iml_half_internal x);
256+
float __imf_fsigmf(float x);
257+
};
258+
259+
template <typename Tp>
260+
std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> sigmoid(Tp x) {
261+
_iml_half_internal xi = sycl::bit_cast<_iml_half_internal>(x);
262+
return sycl::bit_cast<sycl::half>(__imf_fsigmf16(xi));
263+
}
264+
265+
template <typename Tp>
266+
std::enable_if_t<std::is_same_v<Tp, sycl::ext::oneapi::bfloat16>,
267+
sycl::ext::oneapi::bfloat16>
268+
sigmoid(Tp x) {
269+
_iml_bf16_internal xi = sycl::bit_cast<_iml_bf16_internal>(x);
270+
return sycl::bit_cast<sycl::ext::oneapi::bfloat16>(__imf_fsigmbf16(xi));
271+
}
272+
273+
template <typename Tp>
274+
std::enable_if_t<std::is_same_v<Tp, float>, float> sigmoid(Tp x) {
275+
return __imf_fsigmf(x);
276+
}
245277

246278
} // namespace ext::intel::math
247279
} // namespace _V1

sycl/test-e2e/DeviceLib/imf/imf_utils.hpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <cassert>
33
#include <climits>
4+
#include <cmath>
45
#include <cstdint>
56
#include <initializer_list>
67
#include <iostream>
@@ -51,6 +52,58 @@ template <> class imf_utils_default_equ<uint64_t> {
5152
}
5253
};
5354

55+
template <class Ty> class imf_utils_fp_equ {
56+
public:
57+
bool operator()(Ty x, Ty y) { return x == y; }
58+
};
59+
60+
template <> class imf_utils_fp_equ<float> {
61+
public:
62+
bool operator()(float x, float y) {
63+
if ((__builtin_isinf_sign(x) * __builtin_isinf_sign(y)) == 1)
64+
return true;
65+
if (__builtin_isnan(x) || __builtin_isnan(y))
66+
return false;
67+
// Simple check for 2 fp32
68+
const float relative_eps = 1e-4f;
69+
return std::fabs(x - y) <
70+
relative_eps * std::fmax(std::fabs(x), std::fabs(y));
71+
}
72+
};
73+
74+
template <> class imf_utils_fp_equ<sycl::half> {
75+
public:
76+
bool operator()(sycl::half x, sycl::half y) {
77+
float xf = static_cast<float>(x);
78+
float yf = static_cast<float>(y);
79+
if ((__builtin_isinf_sign(xf) * __builtin_isinf_sign(yf)) == 1)
80+
return true;
81+
if (__builtin_isnan(xf) || __builtin_isnan(yf))
82+
return false;
83+
// Simple check for 2 fp16
84+
const float relative_eps = 1e-3f;
85+
return std::fabs(xf - yf) <
86+
relative_eps * std::fmax(std::fabs(xf), std::fabs(yf));
87+
}
88+
};
89+
90+
template <> class imf_utils_fp_equ<sycl::ext::oneapi::bfloat16> {
91+
public:
92+
bool operator()(sycl::ext::oneapi::bfloat16 x,
93+
sycl::ext::oneapi::bfloat16 y) {
94+
float xf = static_cast<float>(x);
95+
float yf = static_cast<float>(y);
96+
if ((__builtin_isinf_sign(xf) * __builtin_isinf_sign(yf)) == 1)
97+
return true;
98+
if (__builtin_isnan(xf) || __builtin_isnan(yf))
99+
return false;
100+
// Simple check for 2 bf16
101+
const float relative_eps = 1e-3f;
102+
return std::fabs(xf - yf) <
103+
relative_eps * std::fmax(std::fabs(xf), std::fabs(yf));
104+
}
105+
};
106+
54107
// Used to test half precision utils
55108
template <class InputTy, class OutputTy, class FuncTy,
56109
class EquTy = imf_utils_default_equ<OutputTy>>
@@ -72,6 +125,42 @@ void test_host(std::initializer_list<InputTy> Input,
72125
}
73126
}
74127

128+
template <class InputTy, class FuncTy, class EquTy = imf_utils_fp_equ<InputTy>>
129+
void test(sycl::queue &q, std::initializer_list<InputTy> Input, FuncTy Func,
130+
int Line = __builtin_LINE()) {
131+
auto Size = Input.size();
132+
std::vector<InputTy> HostRef(Size);
133+
for (size_t Idx = 0; Idx < Size; ++Idx) {
134+
HostRef[Idx] = Func(*(std::begin(Input) + Idx));
135+
}
136+
137+
sycl::buffer<InputTy> InBuf(Size);
138+
{
139+
sycl::host_accessor InAcc(InBuf, sycl::write_only);
140+
int i = 0;
141+
for (auto x : Input)
142+
InAcc[i++] = x;
143+
}
144+
145+
sycl::buffer<InputTy> OutBuf(Size);
146+
q.submit([&](sycl::handler &CGH) {
147+
sycl::accessor InAcc(InBuf, CGH, sycl::read_only);
148+
sycl::accessor OutAcc(OutBuf, CGH, sycl::write_only);
149+
CGH.parallel_for(Size,
150+
[=](sycl::id<1> Id) { OutAcc[Id] = Func(InAcc[Id]); });
151+
}).wait();
152+
153+
sycl::host_accessor Acc(OutBuf, sycl::read_only);
154+
for (size_t Idx = 0; Idx < Size; ++Idx) {
155+
if (EquTy()(HostRef[Idx], Acc[Idx]))
156+
continue;
157+
std::cout << "Mismatch at line " << Line << "[" << Idx << "]: " << Acc[Idx]
158+
<< " != " << HostRef[Idx] << ", input was "
159+
<< *(std::begin(Input) + Idx) << std::endl;
160+
assert(false);
161+
}
162+
}
163+
75164
template <class InputTy, class OutputTy, class FuncTy,
76165
class EquTy = imf_utils_default_equ<OutputTy>>
77166
void test(sycl::queue &q, std::initializer_list<InputTy> Input,
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %{build} -o %t1.out
2+
// RUN: %{run} %t1.out
3+
#include "imf_utils.hpp"
4+
#include <sycl/ext/intel/math.hpp>
5+
6+
namespace sycl_imf = sycl::ext::intel::math;
7+
8+
int main(int, char **) {
9+
sycl::queue device_queue(sycl::default_selector_v);
10+
std::initializer_list<float> input_vals = {
11+
-0x1.4p+3, -0x1p+3, -0x1.8p+2, -0x1p+1, -0x1.8p-1,
12+
-0x1p-1, -0x1p-2, 0x0p+0, 0x1p-2, 0x1p-1,
13+
0x1p+2, 0x1.8p+2, 0x1p+3, 0x1.4p+3, 0x1.8p+3};
14+
test(device_queue, input_vals, F(sycl_imf::sigmoid));
15+
16+
std::initializer_list<sycl::half> input_vals_fp16 = {
17+
-0x1p+3, -0x1.8p+2, -0x1p+1, -0x1.8p-1, -0x1p-1, -0x1p-2,
18+
0x0p+0, 0x1p-2, 0x1p-1, 0x1p+2, 0x1.8p+2, 0x1p+3};
19+
test(device_queue, input_vals_fp16, F(sycl_imf::sigmoid));
20+
21+
std::initializer_list<sycl::ext::oneapi::bfloat16> input_vals_bf16 = {
22+
-0x1p+3, -0x1.8p+2, -0x1p+1, -0x1.8p-1, -0x1p-1, -0x1p-2,
23+
0x0p+0, 0x1p-2, 0x1p-1, 0x1p+2, 0x1.8p+2, 0x1p+3};
24+
test(device_queue, input_vals_bf16, F(sycl_imf::sigmoid));
25+
return 0;
26+
}

0 commit comments

Comments
 (0)