diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index ffa071f209580..259a54c131219 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,62 @@ void nd_launch(queue Q, launch_config, Properties> Config, }); } +// Free function kernel enqueue functions +template +void single_task(queue Q, [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + detail::submit_kernel_direct_single_task(std::move(Q), + [Args...]() { Func(Args...); }); +} + +template +void single_task(handler &CGH, + [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + CGH.single_task([Args...]() { Func(Args...); }); +} + +template +void nd_launch(queue Q, nd_range Range, + [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + detail::submit_kernel_direct_parallel_for( + std::move(Q), Range, [Args...](sycl::nd_item<>) { Func(Args...); }); +} + +template +void nd_launch(handler &CGH, nd_range Range, + [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + CGH.parallel_for(Range, [Args...](sycl::nd_item<>) { Func(Args...); }); +} + +template +void nd_launch(queue Q, launch_config, Properties> Config, + [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + + ext::oneapi::experimental::detail::LaunchConfigAccess, + Properties> + ConfigAccess(Config); + detail::submit_kernel_direct_parallel_for( + std::move(Q), ConfigAccess.getRange(), + [Args...](sycl::nd_item<>) { Func(Args...); }, {}, + ConfigAccess.getProperties()); +} + +template +void nd_launch(handler &CGH, + launch_config, Properties> Config, + [[maybe_unused]] kernel_function_s KernelFunc, + ArgsT &&...Args) { + ext::oneapi::experimental::detail::LaunchConfigAccess, + Properties> + ConfigAccess(Config); + CGH.parallel_for(ConfigAccess.getRange(), ConfigAccess.getProperties(), + [Args...](sycl::nd_item<>) { Func(Args...); }); +} + inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) { CGH.memcpy(Dest, Src, NumBytes); } diff --git a/sycl/include/sycl/ext/oneapi/experimental/free_function_traits.hpp b/sycl/include/sycl/ext/oneapi/experimental/free_function_traits.hpp index 2b5d1f4190d21..e0b15593566f3 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/free_function_traits.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/free_function_traits.hpp @@ -44,6 +44,10 @@ template struct is_kernel { template inline constexpr bool is_kernel_v = is_kernel::value; +template struct kernel_function_s {}; + +template inline constexpr kernel_function_s kernel_function; + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/FreeFunctionKernels/free_function_kernels_enqueue.cpp b/sycl/test-e2e/FreeFunctionKernels/free_function_kernels_enqueue.cpp new file mode 100644 index 0000000000000..19297a918c6cc --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/free_function_kernels_enqueue.cpp @@ -0,0 +1,187 @@ +// REQUIRES: aspect-usm_shared_allocations + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// XFAIL: target-native_cpu +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142 + +// This test checks that free function kernels can be submitted using the +// enqueued functions defined in the free function kernel extension, namely the +// single_task and the nd_launch functions that take a queue/handler as an +// argument. These were added in https://github.com/intel/llvm/pull/19995. + +#include +#include +#include +#include +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +using accType = + sycl::accessor; + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void empty() {} + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void initialize(int *ptr) { + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); + ptr[Lid] = Lid; +} + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void successor(int *src, int *dst) { *dst = *src + 1; } + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void square(int *src, int *dst) { + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); + dst[Lid] = src[Lid] * src[Lid]; +} + +template +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void squareWithScratchMemoryTemplated(T *src, T *dst) { + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); + T *LocalMem = reinterpret_cast(syclexp::get_work_group_scratch_memory()); + LocalMem[Lid] = src[Lid] * src[Lid]; + dst[Lid] = LocalMem[Lid]; +} + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void squareWithAccessor(accType src, accType dst) { + size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id(); + dst[Lid] = src[Lid] * src[Lid]; +} + +constexpr int SIZE = 16; + +int main() { + sycl::queue Q; + int *Src = sycl::malloc_shared(SIZE, Q); + int *Dst = sycl::malloc_shared(SIZE, Q); + + syclexp::single_task(Q, syclexp::kernel_function_s{}); + + syclexp::nd_launch( + Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), + syclexp::kernel_function, Src); + Q.wait(); + + syclexp::launch_config Config{ + ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), + syclexp::properties{ + syclexp::work_group_scratch_size(SIZE * sizeof(int))}}; + + static_assert( + std::is_same_v< + decltype(syclexp::nd_launch( + Q, Config, + syclexp::kernel_function>, + Src, Dst)), + void>); + + syclexp::nd_launch( + Q, Config, + syclexp::kernel_function>, Src, + Dst); + Q.wait(); + + for (int I = 0; I < SIZE; I++) { + assert(Dst[I] == Src[I] * Src[I]); + } + + syclexp::nd_launch( + Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), + syclexp::kernel_function, Src, Dst); + Q.wait(); + + for (int I = 0; I < SIZE; I++) { + assert(Dst[I] == Src[I] * Src[I]); + } + + static_assert( + std::is_same_v, Src, Dst)), + void>); + syclexp::single_task(Q, syclexp::kernel_function, Src, Dst); + Q.wait(); + assert(Dst[0] == Src[0] + 1); + + int SrcData[SIZE]; + int DstData[SIZE]; + for (int I = 0; I < SIZE; ++I) { + SrcData[I] = I; + } + + { // Test with accessors + sycl::buffer SrcBuf{&SrcData[0], SIZE}; + sycl::buffer DstBuf{&DstData[0], SIZE}; + accType SrcAcc{SrcBuf}; + accType DstAcc{DstBuf}; + + Q.submit([&](sycl::handler &CGH) { + CGH.require(SrcAcc); + CGH.require(DstAcc); + syclexp::nd_launch(CGH, Config, + syclexp::kernel_function, SrcAcc, + DstAcc); + }); + } + for (int I = 0; I < SIZE; ++I) { + assert(DstData[I] == SrcData[I] * SrcData[I]); + } + + Q.submit([&](sycl::handler &CGH) { + static_assert( + std::is_same_v>, + Src, Dst)), + void>); + syclexp::nd_launch( + CGH, Config, + syclexp::kernel_function>, Src, + Dst); + }).wait(); + + for (int I = 0; I < SIZE; I++) { + assert(Dst[I] == Src[I] * Src[I]); + } + + Q.submit([&](sycl::handler &CGH) { + static_assert( + std::is_same_v(::sycl::range<1>(SIZE), + ::sycl::range<1>(SIZE)), + syclexp::kernel_function, Src, Dst)), + void>); + + syclexp::nd_launch( + CGH, + ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)), + syclexp::kernel_function, Src, Dst); + }).wait(); + + for (int I = 0; I < SIZE; I++) { + assert(Dst[I] == Src[I] * Src[I]); + } + + Q.submit([&](sycl::handler &CGH) { + static_assert(std::is_same_v, + Src, Dst)), + void>); + syclexp::single_task(CGH, syclexp::kernel_function, Src, Dst); + }).wait(); + + assert(Dst[0] == Src[0] + 1); + + return 0; +}