Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <sycl/detail/common.hpp>
#include <sycl/event.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_types.hpp>
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/handler.hpp>
Expand Down Expand Up @@ -357,6 +358,62 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
});
}

// Free function kernel enqueue functions
template <auto *Func, typename... ArgsT>
void single_task(queue Q, [[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
detail::submit_kernel_direct_single_task(std::move(Q),
[Args...]() { Func(Args...); });
}

template <auto *Func, typename... ArgsT>
void single_task(handler &CGH,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
CGH.single_task([Args...]() { Func(Args...); });
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(queue Q, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
detail::submit_kernel_direct_parallel_for(
std::move(Q), Range, [Args...](sycl::nd_item<>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
CGH.parallel_for(Range, [Args...](sycl::nd_item<>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {

ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);
detail::submit_kernel_direct_parallel_for(
std::move(Q), ConfigAccess.getRange(),
[Args...](sycl::nd_item<>) { Func(Args...); }, {},
ConfigAccess.getProperties());
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(handler &CGH,
launch_config<nd_range<Dimensions>, Properties> Config,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);
CGH.parallel_for(ConfigAccess.getRange(), ConfigAccess.getProperties(),
[Args...](sycl::nd_item<>) { Func(Args...); });
}

inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
CGH.memcpy(Dest, Src, NumBytes);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ template <auto *Func> struct is_kernel {
template <auto *Func>
inline constexpr bool is_kernel_v = is_kernel<Func>::value;

template <auto *Func> struct kernel_function_s {};

template <auto *Func> inline constexpr kernel_function_s<Func> kernel_function;

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
187 changes: 187 additions & 0 deletions sycl/test-e2e/FreeFunctionKernels/free_function_kernels_enqueue.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
// REQUIRES: aspect-usm_shared_allocations

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// XFAIL: target-native_cpu
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142

// This test checks that free function kernels can be submitted using the
// enqueued functions defined in the free function kernel extension, namely the
// single_task and the nd_launch functions that take a queue/handler as an
// argument. These were added in https://github.com/intel/llvm/pull/19995.

#include <cassert>
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>
#include <sycl/ext/oneapi/work_group_static.hpp>
#include <sycl/usm.hpp>

namespace syclext = sycl::ext::oneapi;
namespace syclexp = sycl::ext::oneapi::experimental;

using accType =
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device,
sycl::access::placeholder::true_t>;

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
void empty() {}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void initialize(int *ptr) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
ptr[Lid] = Lid;
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
void successor(int *src, int *dst) { *dst = *src + 1; }

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void square(int *src, int *dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
dst[Lid] = src[Lid] * src[Lid];
}

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void squareWithScratchMemoryTemplated(T *src, T *dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
T *LocalMem = reinterpret_cast<T *>(syclexp::get_work_group_scratch_memory());
LocalMem[Lid] = src[Lid] * src[Lid];
dst[Lid] = LocalMem[Lid];
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void squareWithAccessor(accType src, accType dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
dst[Lid] = src[Lid] * src[Lid];
}

constexpr int SIZE = 16;

int main() {
sycl::queue Q;
int *Src = sycl::malloc_shared<int>(SIZE, Q);
int *Dst = sycl::malloc_shared<int>(SIZE, Q);

syclexp::single_task(Q, syclexp::kernel_function_s<empty>{});

syclexp::nd_launch(
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<initialize>, Src);
Q.wait();

syclexp::launch_config Config{
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::properties{
syclexp::work_group_scratch_size(SIZE * sizeof(int))}};

static_assert(
std::is_same_v<
decltype(syclexp::nd_launch(
Q, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>,
Src, Dst)),
void>);

syclexp::nd_launch(
Q, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
Dst);
Q.wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

syclexp::nd_launch(
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst);
Q.wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

static_assert(
std::is_same_v<decltype(syclexp::single_task(
Q, syclexp::kernel_function<successor>, Src, Dst)),
void>);
syclexp::single_task(Q, syclexp::kernel_function<successor>, Src, Dst);
Q.wait();
assert(Dst[0] == Src[0] + 1);

int SrcData[SIZE];
int DstData[SIZE];
for (int I = 0; I < SIZE; ++I) {
SrcData[I] = I;
}

{ // Test with accessors
sycl::buffer<int> SrcBuf{&SrcData[0], SIZE};
sycl::buffer<int> DstBuf{&DstData[0], SIZE};
accType SrcAcc{SrcBuf};
accType DstAcc{DstBuf};

Q.submit([&](sycl::handler &CGH) {
CGH.require(SrcAcc);
CGH.require(DstAcc);
syclexp::nd_launch(CGH, Config,
syclexp::kernel_function<squareWithAccessor>, SrcAcc,
DstAcc);
});
}
for (int I = 0; I < SIZE; ++I) {
assert(DstData[I] == SrcData[I] * SrcData[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(
std::is_same_v<decltype(syclexp::nd_launch(
CGH, Config,
syclexp::kernel_function<
squareWithScratchMemoryTemplated<int>>,
Src, Dst)),
void>);
syclexp::nd_launch(
CGH, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
Dst);
}).wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(
std::is_same_v<decltype(syclexp::nd_launch(
CGH,
::sycl::nd_range<1>(::sycl::range<1>(SIZE),
::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst)),
void>);

syclexp::nd_launch(
CGH,
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst);
}).wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(std::is_same_v<decltype(syclexp::single_task(
CGH, syclexp::kernel_function<successor>,
Src, Dst)),
void>);
syclexp::single_task(CGH, syclexp::kernel_function<successor>, Src, Dst);
}).wait();

assert(Dst[0] == Src[0] + 1);

return 0;
}
Loading