Skip to content

Commit 8edb62e

Browse files
sndmitrievkbobrovs
andauthored
[SYCL][ESIMD] Change arguments of some APIs to be template parameters (#5961)
* [SYCL][ESIMD] Change arguments of some APIs to be template parameters This is needed to force them to be compile-time constants which is required by low-level intrinsics. Signed-off-by: Sergey Dmitriev <[email protected]> Co-authored-by: Konstantin S Bobrovsky <[email protected]>
1 parent 60d62b0 commit 8edb62e

File tree

7 files changed

+68
-34
lines changed

7 files changed

+68
-34
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,10 @@ class ESIMDIntrinDescTable {
336336
{"svm_block_ld_unaligned", {"svm.block.ld.unaligned", {l(0)}}},
337337
{"svm_block_ld", {"svm.block.ld", {l(0)}}},
338338
{"svm_block_st", {"svm.block.st", {l(1)}}},
339-
{"svm_gather", {"svm.gather", {ai1(2), a(1), a(0), u(-1)}}},
339+
{"svm_gather", {"svm.gather", {ai1(1), t(3), a(0), u(-1)}}},
340340
{"svm_gather4_scaled",
341341
{"svm.gather4.scaled", {ai1(1), t(2), c16(0), c64(0), a(0), u(-1)}}},
342-
{"svm_scatter", {"svm.scatter", {ai1(3), a(2), a(0), a(1)}}},
342+
{"svm_scatter", {"svm.scatter", {ai1(2), t(3), a(0), a(1)}}},
343343
{"svm_scatter4_scaled",
344344
{"svm.scatter4.scaled", {ai1(2), t(2), c16(0), c64(0), a(0), a(1)}}},
345345

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,11 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
8181
} // __SYCL_INLINE_NAMESPACE(cl)
8282

8383
// flat_read does flat-address gather
84-
template <typename Ty, int N, int NumBlk = 0>
84+
template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
8585
__ESIMD_INTRIN
8686
__ESIMD_DNS::vector_type_t<Ty,
8787
N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
8888
__esimd_svm_gather(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
89-
int ElemsPerAddr = NumBlk,
9089
__ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
9190
#ifdef __SYCL_DEVICE_ONLY__
9291
;
@@ -95,18 +94,18 @@ __ESIMD_INTRIN
9594
auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
9695
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
9796
V = 0;
98-
ElemsPerAddr = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
97+
auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
9998
if (sizeof(Ty) == 2)
100-
ElemsPerAddr = ElemsPerAddr / 2;
99+
ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
101100

102101
for (int I = 0; I < N; I++) {
103102
if (pred[I]) {
104103
Ty *Addr = reinterpret_cast<Ty *>(addrs[I]);
105104
if (sizeof(Ty) <= 2) {
106-
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++)
105+
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
107106
V[I * NumBlkDecoded + J] = *(Addr + J);
108107
} else {
109-
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++)
108+
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
110109
V[J * N + I] = *(Addr + J);
111110
}
112111
}
@@ -116,30 +115,30 @@ __ESIMD_INTRIN
116115
#endif // __SYCL_DEVICE_ONLY__
117116

118117
// flat_write does flat-address scatter
119-
template <typename Ty, int N, int NumBlk = 0>
118+
template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
120119
__ESIMD_INTRIN void __esimd_svm_scatter(
121120
__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
122121
__ESIMD_DNS::vector_type_t<Ty,
123122
N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
124123
vals,
125-
int ElemsPerAddr = NumBlk, __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
124+
__ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
126125
#ifdef __SYCL_DEVICE_ONLY__
127126
;
128127
#else
129128
{
130129
auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
131-
ElemsPerAddr = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
130+
auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
132131
if (sizeof(Ty) == 2)
133-
ElemsPerAddr = ElemsPerAddr / 2;
132+
ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
134133

135134
for (int I = 0; I < N; I++) {
136135
if (pred[I]) {
137136
Ty *Addr = reinterpret_cast<Ty *>(addrs[I]);
138137
if (sizeof(Ty) <= 2) {
139-
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++)
138+
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
140139
*(Addr + J) = vals[I * NumBlkDecoded + J];
141140
} else {
142-
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++)
141+
for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
143142
*(Addr + J) = vals[J * N + I];
144143
}
145144
}

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,19 @@ gather(const Tx *p, simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
134134
addrs = addrs + offsets_i;
135135

136136
if constexpr (sizeof(T) == 1) {
137-
auto Ret = __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<4>()>(
138-
addrs.data(), detail::ElemsPerAddrEncoding<1>(), mask.data());
137+
auto Ret = __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<4>(),
138+
detail::ElemsPerAddrEncoding<1>()>(
139+
addrs.data(), mask.data());
139140
return __esimd_rdregion<T, N * 4, N, /*VS*/ 0, N, 4>(Ret, 0);
140141
} else if constexpr (sizeof(T) == 2) {
141-
auto Ret = __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<2>()>(
142-
addrs.data(), detail::ElemsPerAddrEncoding<2>(), mask.data());
142+
auto Ret = __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<2>(),
143+
detail::ElemsPerAddrEncoding<2>()>(
144+
addrs.data(), mask.data());
143145
return __esimd_rdregion<T, N * 2, N, /*VS*/ 0, N, 2>(Ret, 0);
144146
} else
145-
return __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<1>()>(
146-
addrs.data(), detail::ElemsPerAddrEncoding<1>(), mask.data());
147+
return __esimd_svm_gather<T, N, detail::ElemsPerAddrEncoding<1>(),
148+
detail::ElemsPerAddrEncoding<1>()>(addrs.data(),
149+
mask.data());
147150
}
148151

149152
/// Writes ("scatters") elements of the input vector to different memory
@@ -169,17 +172,19 @@ scatter(Tx *p, simd<uint32_t, N> offsets, simd<Tx, N> vals,
169172
if constexpr (sizeof(T) == 1) {
170173
simd<T, N * 4> D;
171174
D = __esimd_wrregion<T, N * 4, N, /*VS*/ 0, N, 4>(D.data(), vals.data(), 0);
172-
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<4>()>(
173-
addrs.data(), D.data(), detail::ElemsPerAddrEncoding<1>(), mask.data());
175+
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<4>(),
176+
detail::ElemsPerAddrEncoding<1>()>(
177+
addrs.data(), D.data(), mask.data());
174178
} else if constexpr (sizeof(T) == 2) {
175179
simd<T, N * 2> D;
176180
D = __esimd_wrregion<T, N * 2, N, /*VS*/ 0, N, 2>(D.data(), vals.data(), 0);
177-
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<2>()>(
178-
addrs.data(), D.data(), detail::ElemsPerAddrEncoding<2>(), mask.data());
181+
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<2>(),
182+
detail::ElemsPerAddrEncoding<2>()>(
183+
addrs.data(), D.data(), mask.data());
179184
} else
180-
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<1>()>(
181-
addrs.data(), vals.data(), detail::ElemsPerAddrEncoding<1>(),
182-
mask.data());
185+
__esimd_svm_scatter<T, N, detail::ElemsPerAddrEncoding<1>(),
186+
detail::ElemsPerAddrEncoding<1>()>(
187+
addrs.data(), vals.data(), mask.data());
183188
}
184189

185190
/// Loads a contiguous block of memory from given memory address and returns
@@ -769,6 +774,9 @@ enum fence_mask : uint8_t {
769774
/// esimd::fence sets the memory read/write order.
770775
/// @tparam cntl A bitmask composed from \c fence_mask bits.
771776
///
777+
template <uint8_t cntl> __ESIMD_API void fence() { __esimd_fence(cntl); }
778+
779+
__SYCL_DEPRECATED("use fence<fence_mask>()")
772780
__ESIMD_API void fence(fence_mask cntl) { __esimd_fence(cntl); }
773781

774782
/// Generic work-group barrier.
@@ -790,6 +798,7 @@ __ESIMD_API void barrier() {
790798
/// @{
791799

792800
/// Declare per-work-group slm size.
801+
/// @param size Shared Local Memory (SLM) size
793802
__ESIMD_API void slm_init(uint32_t size) { __esimd_slm_init(size); }
794803

795804
/// Gather operation over the Shared Local Memory.

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@ namespace __ESIMD_ENS {
2323
/// @addtogroup sycl_esimd_memory
2424
/// @{
2525

26-
/// Generic work-group split barrier
26+
/// Generic work-group split barrier.
27+
/// @tparam flag - split barrier action.
28+
template <split_barrier_action flag> __ESIMD_API void split_barrier() {
29+
__esimd_sbarrier(flag);
30+
}
31+
32+
__SYCL_DEPRECATED("use split_barrier<split_barrier_action>()")
2733
__ESIMD_API void split_barrier(split_barrier_action flag) {
2834
__esimd_sbarrier(flag);
2935
}

sycl/test/esimd/deprecated.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s
2+
3+
#include <sycl/ext/intel/esimd.hpp>
4+
5+
using namespace sycl::ext::intel::esimd;
6+
using namespace sycl::ext::intel::experimental::esimd;
7+
8+
void test_slm_init() SYCL_ESIMD_FUNCTION { slm_init(1024); }
9+
10+
void test_fence() SYCL_ESIMD_FUNCTION {
11+
fence<fence_mask::global_coherent_fence | fence_mask::local_barrier>();
12+
// expected-warning@+2 {{deprecated}}
13+
// expected-note@sycl/ext/intel/esimd/memory.hpp:* {{has been explicitly marked deprecated here}}
14+
fence(static_cast<fence_mask>(fence_mask::global_coherent_fence |
15+
fence_mask::local_barrier));
16+
}
17+
18+
void test_split_barrier() SYCL_ESIMD_FUNCTION {
19+
split_barrier<split_barrier_action::signal>();
20+
}

sycl/test/esimd/intrins_trans.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
6262
// CHECK: call void @llvm.genx.svm.block.st.i64.v32i32(i64 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
6363

6464
simd<uint32_t, VL> v01 =
65-
__esimd_svm_gather<uint32_t, VL>(v_addr.data(), 0, pred.data());
65+
__esimd_svm_gather<uint32_t, VL>(v_addr.data(), pred.data());
6666
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
6767

68-
__esimd_svm_scatter<uint32_t, VL>(v_addr.data(), v01.data(), 0, pred.data());
68+
__esimd_svm_scatter<uint32_t, VL>(v_addr.data(), v01.data(), pred.data());
6969
// CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
7070

7171
simd<short, 16> mina(0, 1);
@@ -228,12 +228,12 @@ test_mem_intrins(uint64_t addr, const vec<float, 8> &xf,
228228
// CHECK-LABEL: call void @llvm.genx.svm.block.st.i64.v8i32(i64 %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}})
229229
}
230230
{
231-
auto x = __esimd_svm_gather<unsigned char, 8>(get8ui64(), 0, get8ui16());
231+
auto x = __esimd_svm_gather<unsigned char, 8>(get8ui64(), get8ui16());
232232
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i8> @llvm.genx.svm.gather.v8i8.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> undef)
233233
use(x);
234234
}
235235
{
236-
__esimd_svm_scatter<unsigned char, 8>(get8ui64(), get8ui8(), 0, get8ui16());
236+
__esimd_svm_scatter<unsigned char, 8>(get8ui64(), get8ui8(), get8ui16());
237237
// CHECK-LABEL: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i8(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> %{{[a-zA-Z0-9.]+}})
238238
}
239239
{

sycl/test/esimd/slm_gather_scatter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ void kernel() __attribute__((sycl_device)) {
1616

1717
auto v0 = slm_gather<int, 32>(offsets);
1818

19-
auto fm =
19+
constexpr auto fm =
2020
fence_mask::global_coherent_fence | fence_mask::l3_flush_instructions |
2121
fence_mask::l3_flush_texture_data | fence_mask::l3_flush_constant_data |
2222
fence_mask::l3_flush_rw_data | fence_mask::local_barrier |
2323
fence_mask::l1_flush_ro_data | fence_mask::sw_barrier;
2424

25-
esimd::fence(static_cast<fence_mask>(fm));
25+
esimd::fence<fm>();
2626
esimd::barrier();
2727

2828
v0 = v0 + v1;

0 commit comments

Comments
 (0)