diff --git a/CMakeLists.txt b/CMakeLists.txt index 428402c1a..7e7699414 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,8 @@ endif() include(cmake/configure_variorum.cmake) set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) -set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available +set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available +set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available if(DEFINED ENV{VTUNE_HOME}) set(VTune_ROOT $ENV{VTUNE_HOME}) diff --git a/common/kp_config.hpp.in b/common/kp_config.hpp.in index 77c160870..09f2ad0d7 100644 --- a/common/kp_config.hpp.in +++ b/common/kp_config.hpp.in @@ -3,6 +3,7 @@ #define USE_MPI @KOKKOSTOOLS_HAS_MPI@ #cmakedefine KOKKOSTOOLS_HAS_NVTX +#cmakedefine KOKKOSTOOLS_HAS_ROCTX #cmakedefine KOKKOSTOOLS_HAS_CALIPER #cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP #cmakedefine KOKKOSTOOLS_HAS_VARIORUM diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index e7490dbcb..88b3b5ac4 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -48,3 +48,6 @@ if(KOKKOSTOOLS_HAS_NVTX) add_kp_test(nvtx_connector "nvtx-connector") add_kp_test(nvtx_focused_connector "nvtx-focused-connector") endif() +if(KOKKOSTOOLS_HAS_ROCTX) + add_kp_test(roctx_connector "roctx-connector") +endif() diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 67419b039..1c8691274 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -52,6 +52,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector) KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector) #endif +#ifdef KOKKOSTOOLS_HAS_ROCTX +KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector) +#endif #ifdef KOKKOSTOOLS_HAS_CALIPER namespace cali { extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set( @@ -93,6 +96,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) { #ifdef KOKKOSTOOLS_HAS_NVTX handlers["nvtx-connector"] = NVTXConnector::get_event_set(); handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_ROCTX + handlers["roctx-connector"] = ROCTXConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; diff --git a/profiling/roctx-connector/kp_roctx_connector.cpp b/profiling/roctx-connector/kp_roctx_connector.cpp index 593210aa3..6c1ea182e 100644 --- a/profiling/roctx-connector/kp_roctx_connector.cpp +++ b/profiling/roctx-connector/kp_roctx_connector.cpp @@ -21,6 +21,8 @@ #include #include +#include "kp_core.hpp" + namespace { struct Section { std::string label; @@ -29,20 +31,28 @@ struct Section { std::vector
kokkosp_sections; } // namespace -struct Kokkos_Tools_ToolSettings { - bool requires_global_fencing; - bool padding[255]; -}; +namespace KokkosTools { +namespace ROCTXConnector { + +static bool tool_globfences; -extern "C" void kokkosp_request_tool_settings( - const uint32_t, Kokkos_Tools_ToolSettings* settings) { - settings->requires_global_fencing = false; +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + if (tool_globfences) { + settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; + } } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t /*devInfoCount*/, - void* /*deviceInfo*/) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t /*devInfoCount*/, + Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { + const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (tool_global_fences) { + tool_globfences = (atoi(tool_global_fences) != 0); + } + std::cout << "-----------------------------------------------------------\n" << "KokkosP: ROC Tracer Connector (sequence is " << loadSeq << ", version: " << interfaceVer << ")\n" @@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, roctxMark("Kokkos::Initialization Complete"); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { std::cout << R"( ----------------------------------------------------------- KokkosP: Finalization of ROC Tracer Connector. Complete. @@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete. roctxMark("Kokkos::Finalization Complete"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_for(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_push_profile_region(char* name) { - roctxRangePush(name); -} +void kokkosp_push_profile_region(const char* name) { roctxRangePush(name); } -extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); } +void kokkosp_pop_profile_region() { roctxRangePop(); } -extern "C" void kokkosp_create_profile_section(const char* name, - uint32_t* sID) { +void kokkosp_create_profile_section(const char* name, uint32_t* sID) { *sID = kokkosp_sections.size(); kokkosp_sections.push_back( {std::string(name), static_cast(-1)}); } -extern "C" void kokkosp_start_profile_section(const uint32_t sID) { +void kokkosp_start_profile_section(const uint32_t sID) { auto& section = kokkosp_sections[sID]; section.id = roctxRangeStart(section.label.c_str()); } -extern "C" void kokkosp_stop_profile_section(const uint32_t sID) { +void kokkosp_stop_profile_section(const uint32_t sID) { auto const& section = kokkosp_sections[sID]; roctxRangeStop(section.id); } -extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) { +void kokkosp_destroy_profile_section(const uint32_t sID) { // do nothing } -extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/, - uint64_t* fID) { +void kokkosp_profile_event(const char* name) { roctxMark(name); } + +void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/, + uint64_t* fID) { *fID = roctxRangeStart(name); } -extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); } +void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.create_profile_section = kokkosp_create_profile_section; + my_event_set.start_profile_section = kokkosp_start_profile_section; + my_event_set.stop_profile_section = kokkosp_stop_profile_section; + my_event_set.destroy_profile_section = kokkosp_destroy_profile_section; + my_event_set.profile_event = kokkosp_profile_event; + my_event_set.begin_fence = kokkosp_begin_fence; + my_event_set.end_fence = kokkosp_end_fence; + return my_event_set; +} + +} // namespace ROCTXConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::ROCTXConnector; + +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section) +EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section) +EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section) +EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section) +EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event); +EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence); +EXPOSE_END_FENCE(impl::kokkosp_end_fence); +} // extern "C"