Skip to content

Commit

Permalink
Use eventset in roctxconnector (as in nvtxconnector) (#219)
Browse files Browse the repository at this point in the history
* :Use eventset for roctxconnector

* Improvements thanks to review.
  • Loading branch information
maartenarnst authored Nov 20, 2023
1 parent 2ddedef commit 62f3f02
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 43 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ endif()
include(cmake/configure_variorum.cmake)

set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER})
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available

if(DEFINED ENV{VTUNE_HOME})
set(VTune_ROOT $ENV{VTUNE_HOME})
Expand Down
1 change: 1 addition & 0 deletions common/kp_config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define USE_MPI @KOKKOSTOOLS_HAS_MPI@

#cmakedefine KOKKOSTOOLS_HAS_NVTX
#cmakedefine KOKKOSTOOLS_HAS_ROCTX
#cmakedefine KOKKOSTOOLS_HAS_CALIPER
#cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP
#cmakedefine KOKKOSTOOLS_HAS_VARIORUM
Expand Down
3 changes: 3 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ if(KOKKOSTOOLS_HAS_NVTX)
add_kp_test(nvtx_connector "nvtx-connector")
add_kp_test(nvtx_focused_connector "nvtx-focused-connector")
endif()
if(KOKKOSTOOLS_HAS_ROCTX)
add_kp_test(roctx_connector "roctx-connector")
endif()
6 changes: 6 additions & 0 deletions profiling/all/kp_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_CALIPER
namespace cali {
extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set(
Expand Down Expand Up @@ -93,6 +96,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) {
#ifdef KOKKOSTOOLS_HAS_NVTX
handlers["nvtx-connector"] = NVTXConnector::get_event_set();
handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set();
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
handlers["roctx-connector"] = ROCTXConnector::get_event_set();
#endif
auto e = handlers.find(profiler);
if (e != handlers.end()) return e->second;
Expand Down
136 changes: 94 additions & 42 deletions profiling/roctx-connector/kp_roctx_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <string>
#include <vector>

#include "kp_core.hpp"

namespace {
struct Section {
std::string label;
Expand All @@ -29,20 +31,28 @@ struct Section {
std::vector<Section> kokkosp_sections;
} // namespace

struct Kokkos_Tools_ToolSettings {
bool requires_global_fencing;
bool padding[255];
};
namespace KokkosTools {
namespace ROCTXConnector {

static bool tool_globfences;

extern "C" void kokkosp_request_tool_settings(
const uint32_t, Kokkos_Tools_ToolSettings* settings) {
settings->requires_global_fencing = false;
void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
if (tool_globfences) {
settings->requires_global_fencing = true;
} else {
settings->requires_global_fencing = false;
}
}

extern "C" void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
void* /*deviceInfo*/) {
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {
const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES");
if (tool_global_fences) {
tool_globfences = (atoi(tool_global_fences) != 0);
}

std::cout << "-----------------------------------------------------------\n"
<< "KokkosP: ROC Tracer Connector (sequence is " << loadSeq
<< ", version: " << interfaceVer << ")\n"
Expand All @@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq,
roctxMark("Kokkos::Initialization Complete");
}

extern "C" void kokkosp_finalize_library() {
void kokkosp_finalize_library() {
std::cout << R"(
-----------------------------------------------------------
KokkosP: Finalization of ROC Tracer Connector. Complete.
Expand All @@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete.
roctxMark("Kokkos::Finalization Complete");
}

extern "C" void kokkosp_begin_parallel_for(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_for(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_begin_parallel_scan(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_push_profile_region(char* name) {
roctxRangePush(name);
}
void kokkosp_push_profile_region(const char* name) { roctxRangePush(name); }

extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); }
void kokkosp_pop_profile_region() { roctxRangePop(); }

extern "C" void kokkosp_create_profile_section(const char* name,
uint32_t* sID) {
void kokkosp_create_profile_section(const char* name, uint32_t* sID) {
*sID = kokkosp_sections.size();
kokkosp_sections.push_back(
{std::string(name), static_cast<roctx_range_id_t>(-1)});
}

extern "C" void kokkosp_start_profile_section(const uint32_t sID) {
void kokkosp_start_profile_section(const uint32_t sID) {
auto& section = kokkosp_sections[sID];
section.id = roctxRangeStart(section.label.c_str());
}

extern "C" void kokkosp_stop_profile_section(const uint32_t sID) {
void kokkosp_stop_profile_section(const uint32_t sID) {
auto const& section = kokkosp_sections[sID];
roctxRangeStop(section.id);
}

extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) {
void kokkosp_destroy_profile_section(const uint32_t sID) {
// do nothing
}

extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
void kokkosp_profile_event(const char* name) { roctxMark(name); }

void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
*fID = roctxRangeStart(name);
}

extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }
void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }

Kokkos::Tools::Experimental::EventSet get_event_set() {
Kokkos::Tools::Experimental::EventSet my_event_set;
memset(&my_event_set, 0,
sizeof(my_event_set)); // zero any pointers not set here
my_event_set.request_tool_settings = kokkosp_request_tool_settings;
my_event_set.init = kokkosp_init_library;
my_event_set.finalize = kokkosp_finalize_library;
my_event_set.push_region = kokkosp_push_profile_region;
my_event_set.pop_region = kokkosp_pop_profile_region;
my_event_set.begin_parallel_for = kokkosp_begin_parallel_for;
my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce;
my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan;
my_event_set.end_parallel_for = kokkosp_end_parallel_for;
my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce;
my_event_set.end_parallel_scan = kokkosp_end_parallel_scan;
my_event_set.create_profile_section = kokkosp_create_profile_section;
my_event_set.start_profile_section = kokkosp_start_profile_section;
my_event_set.stop_profile_section = kokkosp_stop_profile_section;
my_event_set.destroy_profile_section = kokkosp_destroy_profile_section;
my_event_set.profile_event = kokkosp_profile_event;
my_event_set.begin_fence = kokkosp_begin_fence;
my_event_set.end_fence = kokkosp_end_fence;
return my_event_set;
}

} // namespace ROCTXConnector
} // namespace KokkosTools

extern "C" {

namespace impl = KokkosTools::ROCTXConnector;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region)
EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for)
EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan)
EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce)
EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section)
EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section)
EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section)
EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section)
EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event);
EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence);
EXPOSE_END_FENCE(impl::kokkosp_end_fence);
} // extern "C"

0 comments on commit 62f3f02

Please sign in to comment.