Skip to content

Commit d1478de

Browse files
authored
[SYCL] Address Issue about Unable to Specify Kernel Properties when Reductions are Present (#20491)
Basically to fix this [issue](#16320). Note: seems that other than certain variants (overloads) of `nd_launch` which is explicitly mentioned in the issue, some variants of `parallel_for` with reductions are also having the same problem. Furthermore, these problematic `nd_launch` variants actually calls the problematic `paralle_for` variants, so in the end only `parallel_for` variants are modified. Also there're some test revisions in this PR. These changes are previously blocked by the aforementioned issue. And as of the revision itself, the key idea here is to get rid of all `parallel_for`/`single_task` overloads that take a property list as a parameter (since these overloads are to be deprecated). --------- Signed-off-by: Hu, Peisen <[email protected]>
1 parent bb941bd commit d1478de

File tree

7 files changed

+1108
-591
lines changed

7 files changed

+1108
-591
lines changed

sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,24 @@ struct ConflictingProperties<max_linear_work_group_size_key, Properties> {
444444
}();
445445
};
446446

447+
// If the kernel (last element in the parameter pack) has a get(properties_tag)
448+
// method, return the property list specified by this getter. Otherwise, return
449+
// an empty properety list.
450+
template <typename... RestT>
451+
auto RetrieveGetMethodPropertiesOrEmpty(RestT &&...Rest) {
452+
// Note: the following trivial identity lambda is used to avoid the issue
453+
// that line "const auto &KernelObj = (Rest, ...);" may result in a "left
454+
// operand of comma operator has no effect" error for certain compiler(s)
455+
auto Identity = [](const auto &x) -> decltype(auto) { return x; };
456+
const auto &KernelObj = (Identity(Rest), ...);
457+
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
458+
decltype(KernelObj)>::value) {
459+
return KernelObj.get(ext::oneapi::experimental::properties_tag{});
460+
} else {
461+
return ext::oneapi::experimental::empty_properties_t{};
462+
}
463+
}
464+
447465
} // namespace detail
448466
} // namespace ext::oneapi::experimental
449467
} // namespace _V1

sycl/include/sycl/handler.hpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,27 +1789,33 @@ class __SYCL_EXPORT handler {
17891789
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
17901790
(sizeof...(RestT) > 1)>
17911791
parallel_for(range<1> Range, RestT &&...Rest) {
1792-
parallel_for<KernelName>(Range,
1793-
ext::oneapi::experimental::empty_properties_t{},
1794-
std::forward<RestT>(Rest)...);
1792+
parallel_for<KernelName>(
1793+
Range,
1794+
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
1795+
Rest...),
1796+
std::forward<RestT>(Rest)...);
17951797
}
17961798

17971799
template <typename KernelName = detail::auto_name, typename... RestT>
17981800
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
17991801
(sizeof...(RestT) > 1)>
18001802
parallel_for(range<2> Range, RestT &&...Rest) {
1801-
parallel_for<KernelName>(Range,
1802-
ext::oneapi::experimental::empty_properties_t{},
1803-
std::forward<RestT>(Rest)...);
1803+
parallel_for<KernelName>(
1804+
Range,
1805+
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
1806+
Rest...),
1807+
std::forward<RestT>(Rest)...);
18041808
}
18051809

18061810
template <typename KernelName = detail::auto_name, typename... RestT>
18071811
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
18081812
(sizeof...(RestT) > 1)>
18091813
parallel_for(range<3> Range, RestT &&...Rest) {
1810-
parallel_for<KernelName>(Range,
1811-
ext::oneapi::experimental::empty_properties_t{},
1812-
std::forward<RestT>(Rest)...);
1814+
parallel_for<KernelName>(
1815+
Range,
1816+
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
1817+
Rest...),
1818+
std::forward<RestT>(Rest)...);
18131819
}
18141820

18151821
template <typename KernelName = detail::auto_name, int Dims,
@@ -1834,7 +1840,20 @@ class __SYCL_EXPORT handler {
18341840

18351841
template <typename KernelName = detail::auto_name, int Dims,
18361842
typename... RestT>
1837-
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value>
1843+
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
1844+
(sizeof...(RestT) > 1)> // variant with reductions
1845+
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
1846+
parallel_for<KernelName>(
1847+
Range,
1848+
ext::oneapi::experimental::detail::RetrieveGetMethodPropertiesOrEmpty(
1849+
Rest...),
1850+
std::forward<RestT>(Rest)...);
1851+
}
1852+
1853+
template <typename KernelName = detail::auto_name, int Dims,
1854+
typename... RestT>
1855+
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
1856+
(sizeof...(RestT) == 1)> // variant without reductions
18381857
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
18391858
parallel_for<KernelName>(Range,
18401859
ext::oneapi::experimental::empty_properties_t{},

sycl/test-e2e/Properties/cache_config.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
// REQUIRES: gpu, level_zero
22

3-
// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
4-
// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is
5-
// resolved.
6-
// RUN: %{build} -o %t.out -Wno-deprecated-declarations
3+
// RUN: %{build} -o %t.out
74
// RUN: env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s
85

96
#include <numeric>
@@ -16,34 +13,39 @@ using namespace sycl::ext::intel::experimental;
1613
using namespace sycl::ext::oneapi::experimental;
1714

1815
struct KernelFunctor {
19-
20-
KernelFunctor() {}
21-
2216
void operator()() const {}
2317
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
2418
};
2519

2620
struct KernelFunctorND {
27-
28-
KernelFunctorND() {}
29-
3021
void operator()(nd_item<2> i) const {}
3122
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
3223
};
3324

3425
struct NegativeKernelFunctor {
35-
36-
NegativeKernelFunctor() {}
37-
3826
void operator()(nd_item<2> i) const {}
3927
auto get(properties_tag) const { return properties{}; }
4028
};
4129

4230
struct RangeKernelFunctor {
31+
void operator()(id<2> i) const {}
32+
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
33+
};
34+
35+
struct WorkGroupFunctor {
36+
void operator()(group<1> g) const {
37+
g.parallel_for_work_item([&](h_item<1>) {});
38+
}
39+
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
40+
};
4341

44-
RangeKernelFunctor() {}
42+
template <typename T1> struct ReductionKernelFunctor {
43+
T1 mInput_values;
44+
ReductionKernelFunctor(T1 &Input_values) : mInput_values(Input_values) {}
4545

46-
void operator()(id<2> i) const {}
46+
template <typename sumT> void operator()(id<1> idx, sumT &sum) const {
47+
sum += mInput_values[idx];
48+
}
4749
auto get(properties_tag) const { return properties{cache_config(large_slm)}; }
4850
};
4951

@@ -63,18 +65,16 @@ int main() {
6365
// CHECK: zeKernelSetCacheConfig
6466
std::cout << "parallel_for_work_group(range, func)" << std::endl;
6567
q.submit([&](handler &cgh) {
66-
cgh.parallel_for_work_group<class hpar_range>(
67-
range<1>(8), properties,
68-
[=](group<1> g) { g.parallel_for_work_item([&](h_item<1> i) {}); });
68+
cgh.parallel_for_work_group<class hpar_range>(range<1>(8),
69+
WorkGroupFunctor{});
6970
});
7071

7172
// CHECK: parallel_for_work_group(range, range, func)
7273
// CHECK: zeKernelSetCacheConfig
7374
std::cout << "parallel_for_work_group(range, range, func)" << std::endl;
7475
q.submit([&](handler &cgh) {
7576
cgh.parallel_for_work_group<class hpar_range_range>(
76-
range<1>(8), range<1>(4), properties,
77-
[=](group<1> g) { g.parallel_for_work_item([&](h_item<1> i) {}); });
77+
range<1>(8), range<1>(4), WorkGroupFunctor{});
7878
});
7979

8080
buffer<int> values_buf{1024};
@@ -92,8 +92,8 @@ int main() {
9292
q.submit([&](handler &cgh) {
9393
auto input_values = values_buf.get_access<access_mode::read>(cgh);
9494
auto sum_reduction = reduction(sum_buf, cgh, plus<>());
95-
cgh.parallel_for(range<1>{1024}, properties, sum_reduction,
96-
[=](id<1> idx, auto &sum) { sum += input_values[idx]; });
95+
cgh.parallel_for(range<1>{1024}, sum_reduction,
96+
ReductionKernelFunctor(input_values));
9797
});
9898

9999
// CHECK: KernelFunctor single_task
@@ -111,7 +111,7 @@ int main() {
111111
// CHECK-NOT: zeKernelSetCacheConfig
112112
std::cout << "negative parallel_for with sycl::nd_range" << std::endl;
113113
q.parallel_for(nd_range<2>{range<2>(4, 4), range<2>(2, 2)},
114-
[=](nd_item<2> i) {})
114+
NegativeKernelFunctor{})
115115
.wait();
116116

117117
// CHECK: negative parallel_for with KernelFunctor

0 commit comments

Comments
 (0)