Skip to content

Commit

Permalink
Add initialization workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
G-071 committed Feb 13, 2024
1 parent dd5cb88 commit 451cfc2
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
38 changes: 38 additions & 0 deletions src/monopole_interactions/monopole_kernel_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,44 @@

#include "octotiger/options.hpp"

#if defined(OCTOTIGER_HAVE_KOKKOS) && defined(KOKKOS_ENABLE_SYCL)
#include <CL/sycl.hpp>
// We encounter segfaults on Intel GPUs when running the normal kernels for the first time after
// the program starts. This seems to be some initialization issue as we can simply fix it by
// (non-concurrently) run simple dummy kernel first right after starting octotiger
// (presumably initializes something within the intel gpu runtime).
// Curiousely we have to do this not once per program, but once per lib (octolib and hydrolib).
//
// Somewhat of an ugly workaround but it does the trick and allows us to target Intel GPUs as
// Octo-Tiger runs as expected after applying this workaround.

// TODO(daissgr) Check again in the future to see if the runtime has matured and we don't need this anymore.
// (last check was 02/2024)

/// Utility function working around segfault on Intel GPU. Initializes something within the runtime by runnning
///a dummy kernel
int touch_sycl_device_by_running_a_dummy_kernel(void) {
try {
cl::sycl::queue q(cl::sycl::default_selector_v, cl::sycl::property::queue::in_order{});
cl::sycl::event my_kernel_event = q.submit(
[&](cl::sycl::handler& h) {
h.parallel_for(512, [=](auto i) {});
},
cl::sycl::detail::code_location{});
my_kernel_event.wait();
} catch (sycl::exception const& e) {
std::cerr << "(NON-FATAL) ERROR: Caught sycl::exception during SYCL dummy kernel!\n";
std::cerr << " {what}: " << e.what() << "\n ";
std::cerr << "Continuing for now as error only occured in the dummy kernel...\n";
return 2;

}
return 1;
}
/// Dummy variable to ensure the touch_sycl_device_by_running_a_dummy_kernel is being run
const int init_sycl_device = touch_sycl_device_by_running_a_dummy_kernel();
#endif

#if defined(OCTOTIGER_HAVE_KOKKOS)
#if defined(KOKKOS_ENABLE_CUDA)
using device_executor = hpx::kokkos::cuda_executor;
Expand Down
37 changes: 37 additions & 0 deletions src/unitiger/hydro_impl/hydro_kernel_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,43 @@
#ifdef OCTOTIGER_HAVE_KOKKOS
#include "octotiger/unitiger/hydro_impl/hydro_kokkos_kernel.hpp"
#endif
#if defined(OCTOTIGER_HAVE_KOKKOS) && defined(KOKKOS_ENABLE_SYCL)
#include <CL/sycl.hpp>
// We encounter segfaults on Intel GPUs when running the normal kernels for the first time after
// the program starts. This seems to be some initialization issue as we can simply fix it by
// (non-concurrently) run simple dummy kernel first right after starting octotiger
// (presumably initializes something within the intel gpu runtime).
// Curiousely we have to do this not once per program, but once per lib (octolib and hydrolib).
//
// Somewhat of an ugly workaround but it does the trick and allows us to target Intel GPUs as
// Octo-Tiger runs as expected after applying this workaround.

// TODO(daissgr) Check again in the future to see if the runtime has matured and we don't need this anymore.
// (last check was 02/2024)

/// Utility function working around segfault on Intel GPU. Initializes something within the runtime by runnning
///a dummy kernel
int touch_sycl_device_by_running_a_dummy_kernel(void) {
try {
cl::sycl::queue q(cl::sycl::default_selector_v, cl::sycl::property::queue::in_order{});
cl::sycl::event my_kernel_event = q.submit(
[&](cl::sycl::handler& h) {
h.parallel_for(512, [=](auto i) {});
},
cl::sycl::detail::code_location{});
my_kernel_event.wait();
} catch (sycl::exception const& e) {
std::cerr << "(NON-FATAL) ERROR: Caught sycl::exception during SYCL dummy kernel!\n";
std::cerr << " {what}: " << e.what() << "\n ";
std::cerr << "Continuing for now as error only occured in the dummy kernel...\n";
return 2;

}
return 1;
}
/// Dummy variable to ensure the touch_sycl_device_by_running_a_dummy_kernel is being run
const int init_sycl_device = touch_sycl_device_by_running_a_dummy_kernel();
#endif
#if defined(OCTOTIGER_HAVE_KOKKOS)
hpx::once_flag init_hydro_kokkos_pool_flag;
#if defined(KOKKOS_ENABLE_CUDA)
Expand Down

0 comments on commit 451cfc2

Please sign in to comment.