From 451cfc216241da56be10d3cadbcab96ed5d44619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20Dai=C3=9F?= Date: Tue, 13 Feb 2024 15:41:48 +0100 Subject: [PATCH] Add initialization workaround --- .../monopole_kernel_interface.cpp | 38 +++++++++++++++++++ .../hydro_impl/hydro_kernel_interface.cpp | 37 ++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/monopole_interactions/monopole_kernel_interface.cpp b/src/monopole_interactions/monopole_kernel_interface.cpp index 221e7649..e358a1c5 100644 --- a/src/monopole_interactions/monopole_kernel_interface.cpp +++ b/src/monopole_interactions/monopole_kernel_interface.cpp @@ -29,6 +29,44 @@ #include "octotiger/options.hpp" +#if defined(OCTOTIGER_HAVE_KOKKOS) && defined(KOKKOS_ENABLE_SYCL) +#include +// We encounter segfaults on Intel GPUs when running the normal kernels for the first time after +// the program starts. This seems to be some initialization issue as we can simply fix it by +// (non-concurrently) run simple dummy kernel first right after starting octotiger +// (presumably initializes something within the intel gpu runtime). +// Curiousely we have to do this not once per program, but once per lib (octolib and hydrolib). +// +// Somewhat of an ugly workaround but it does the trick and allows us to target Intel GPUs as +// Octo-Tiger runs as expected after applying this workaround. + +// TODO(daissgr) Check again in the future to see if the runtime has matured and we don't need this anymore. +// (last check was 02/2024) + +/// Utility function working around segfault on Intel GPU. Initializes something within the runtime by runnning +///a dummy kernel +int touch_sycl_device_by_running_a_dummy_kernel(void) { + try { + cl::sycl::queue q(cl::sycl::default_selector_v, cl::sycl::property::queue::in_order{}); + cl::sycl::event my_kernel_event = q.submit( + [&](cl::sycl::handler& h) { + h.parallel_for(512, [=](auto i) {}); + }, + cl::sycl::detail::code_location{}); + my_kernel_event.wait(); + } catch (sycl::exception const& e) { + std::cerr << "(NON-FATAL) ERROR: Caught sycl::exception during SYCL dummy kernel!\n"; + std::cerr << " {what}: " << e.what() << "\n "; + std::cerr << "Continuing for now as error only occured in the dummy kernel...\n"; + return 2; + + } + return 1; +} +/// Dummy variable to ensure the touch_sycl_device_by_running_a_dummy_kernel is being run +const int init_sycl_device = touch_sycl_device_by_running_a_dummy_kernel(); +#endif + #if defined(OCTOTIGER_HAVE_KOKKOS) #if defined(KOKKOS_ENABLE_CUDA) using device_executor = hpx::kokkos::cuda_executor; diff --git a/src/unitiger/hydro_impl/hydro_kernel_interface.cpp b/src/unitiger/hydro_impl/hydro_kernel_interface.cpp index 53e0ff4e..70c87361 100644 --- a/src/unitiger/hydro_impl/hydro_kernel_interface.cpp +++ b/src/unitiger/hydro_impl/hydro_kernel_interface.cpp @@ -15,6 +15,43 @@ #ifdef OCTOTIGER_HAVE_KOKKOS #include "octotiger/unitiger/hydro_impl/hydro_kokkos_kernel.hpp" #endif +#if defined(OCTOTIGER_HAVE_KOKKOS) && defined(KOKKOS_ENABLE_SYCL) +#include +// We encounter segfaults on Intel GPUs when running the normal kernels for the first time after +// the program starts. This seems to be some initialization issue as we can simply fix it by +// (non-concurrently) run simple dummy kernel first right after starting octotiger +// (presumably initializes something within the intel gpu runtime). +// Curiousely we have to do this not once per program, but once per lib (octolib and hydrolib). +// +// Somewhat of an ugly workaround but it does the trick and allows us to target Intel GPUs as +// Octo-Tiger runs as expected after applying this workaround. + +// TODO(daissgr) Check again in the future to see if the runtime has matured and we don't need this anymore. +// (last check was 02/2024) + +/// Utility function working around segfault on Intel GPU. Initializes something within the runtime by runnning +///a dummy kernel +int touch_sycl_device_by_running_a_dummy_kernel(void) { + try { + cl::sycl::queue q(cl::sycl::default_selector_v, cl::sycl::property::queue::in_order{}); + cl::sycl::event my_kernel_event = q.submit( + [&](cl::sycl::handler& h) { + h.parallel_for(512, [=](auto i) {}); + }, + cl::sycl::detail::code_location{}); + my_kernel_event.wait(); + } catch (sycl::exception const& e) { + std::cerr << "(NON-FATAL) ERROR: Caught sycl::exception during SYCL dummy kernel!\n"; + std::cerr << " {what}: " << e.what() << "\n "; + std::cerr << "Continuing for now as error only occured in the dummy kernel...\n"; + return 2; + + } + return 1; +} +/// Dummy variable to ensure the touch_sycl_device_by_running_a_dummy_kernel is being run +const int init_sycl_device = touch_sycl_device_by_running_a_dummy_kernel(); +#endif #if defined(OCTOTIGER_HAVE_KOKKOS) hpx::once_flag init_hydro_kokkos_pool_flag; #if defined(KOKKOS_ENABLE_CUDA)