Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use eventset in roctxconnector (as in nvtxconnector) #219

Merged
merged 2 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ endif()
include(cmake/configure_variorum.cmake)

set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER})
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available

if(DEFINED ENV{VTUNE_HOME})
set(VTune_ROOT $ENV{VTUNE_HOME})
Expand Down
1 change: 1 addition & 0 deletions common/kp_config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define USE_MPI @KOKKOSTOOLS_HAS_MPI@

#cmakedefine KOKKOSTOOLS_HAS_NVTX
#cmakedefine KOKKOSTOOLS_HAS_ROCTX
#cmakedefine KOKKOSTOOLS_HAS_CALIPER
#cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP
#cmakedefine KOKKOSTOOLS_HAS_VARIORUM
Expand Down
3 changes: 3 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ if(KOKKOSTOOLS_HAS_NVTX)
add_kp_test(nvtx_connector "nvtx-connector")
add_kp_test(nvtx_focused_connector "nvtx-focused-connector")
endif()
if(KOKKOSTOOLS_HAS_ROCTX)
add_kp_test(roctx_connector "roctx-connector")
endif()
6 changes: 6 additions & 0 deletions profiling/all/kp_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_CALIPER
namespace cali {
extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set(
Expand Down Expand Up @@ -93,6 +96,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) {
#ifdef KOKKOSTOOLS_HAS_NVTX
handlers["nvtx-connector"] = NVTXConnector::get_event_set();
handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set();
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
handlers["roctx-connector"] = ROCTXConnector::get_event_set();
#endif
auto e = handlers.find(profiler);
if (e != handlers.end()) return e->second;
Expand Down
136 changes: 94 additions & 42 deletions profiling/roctx-connector/kp_roctx_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <string>
#include <vector>

#include "kp_core.hpp"

namespace {
struct Section {
std::string label;
Expand All @@ -29,20 +31,28 @@ struct Section {
std::vector<Section> kokkosp_sections;
} // namespace

struct Kokkos_Tools_ToolSettings {
bool requires_global_fencing;
bool padding[255];
};
namespace KokkosTools {
namespace ROCTXConnector {

static bool tool_globfences;

extern "C" void kokkosp_request_tool_settings(
const uint32_t, Kokkos_Tools_ToolSettings* settings) {
settings->requires_global_fencing = false;
void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
if (tool_globfences) {
settings->requires_global_fencing = true;
} else {
settings->requires_global_fencing = false;
}
}

extern "C" void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
void* /*deviceInfo*/) {
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {
const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES");
if (tool_global_fences) {
tool_globfences = (atoi(tool_global_fences) != 0);
}

std::cout << "-----------------------------------------------------------\n"
<< "KokkosP: ROC Tracer Connector (sequence is " << loadSeq
<< ", version: " << interfaceVer << ")\n"
Expand All @@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq,
roctxMark("Kokkos::Initialization Complete");
}

extern "C" void kokkosp_finalize_library() {
void kokkosp_finalize_library() {
std::cout << R"(
-----------------------------------------------------------
KokkosP: Finalization of ROC Tracer Connector. Complete.
Expand All @@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete.
roctxMark("Kokkos::Finalization Complete");
}

extern "C" void kokkosp_begin_parallel_for(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_for(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_begin_parallel_scan(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) {
roctxRangePop();
}
void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { roctxRangePop(); }

extern "C" void kokkosp_push_profile_region(char* name) {
roctxRangePush(name);
}
void kokkosp_push_profile_region(const char* name) { roctxRangePush(name); }

extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); }
void kokkosp_pop_profile_region() { roctxRangePop(); }

extern "C" void kokkosp_create_profile_section(const char* name,
uint32_t* sID) {
void kokkosp_create_profile_section(const char* name, uint32_t* sID) {
*sID = kokkosp_sections.size();
kokkosp_sections.push_back(
{std::string(name), static_cast<roctx_range_id_t>(-1)});
}

extern "C" void kokkosp_start_profile_section(const uint32_t sID) {
void kokkosp_start_profile_section(const uint32_t sID) {
auto& section = kokkosp_sections[sID];
section.id = roctxRangeStart(section.label.c_str());
}

extern "C" void kokkosp_stop_profile_section(const uint32_t sID) {
void kokkosp_stop_profile_section(const uint32_t sID) {
auto const& section = kokkosp_sections[sID];
roctxRangeStop(section.id);
}

extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) {
void kokkosp_destroy_profile_section(const uint32_t sID) {
// do nothing
}

extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
void kokkosp_profile_event(const char* name) { roctxMark(name); }

void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
*fID = roctxRangeStart(name);
}

extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }
void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }

Kokkos::Tools::Experimental::EventSet get_event_set() {
Kokkos::Tools::Experimental::EventSet my_event_set;
memset(&my_event_set, 0,
sizeof(my_event_set)); // zero any pointers not set here
Comment on lines +130 to +131
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uggg. That thing really something that should be handled at construction.
I suspect value-initialization

EventSet my_event_set{};

does what you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed :) I used this piece of code because all connectors are currently doing it this way. It may be best to address the issue for all connectors in a separate PR? One way forward might be to provide default initialisers using = NULL for the function pointers in the declaration of the struct in Kokkos_Profiling_C_interface.h?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used this piece of code because all connectors are currently doing it this way.

You are right. This is fine then.

And yes definitely fix elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my_event_set.request_tool_settings = kokkosp_request_tool_settings;
my_event_set.init = kokkosp_init_library;
my_event_set.finalize = kokkosp_finalize_library;
my_event_set.push_region = kokkosp_push_profile_region;
my_event_set.pop_region = kokkosp_pop_profile_region;
my_event_set.begin_parallel_for = kokkosp_begin_parallel_for;
my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce;
my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan;
my_event_set.end_parallel_for = kokkosp_end_parallel_for;
my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce;
my_event_set.end_parallel_scan = kokkosp_end_parallel_scan;
my_event_set.create_profile_section = kokkosp_create_profile_section;
my_event_set.start_profile_section = kokkosp_start_profile_section;
my_event_set.stop_profile_section = kokkosp_stop_profile_section;
my_event_set.destroy_profile_section = kokkosp_destroy_profile_section;
my_event_set.profile_event = kokkosp_profile_event;
my_event_set.begin_fence = kokkosp_begin_fence;
my_event_set.end_fence = kokkosp_end_fence;
return my_event_set;
}

} // namespace ROCTXConnector
} // namespace KokkosTools

extern "C" {

namespace impl = KokkosTools::ROCTXConnector;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region)
EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for)
EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan)
EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce)
EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section)
EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section)
EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section)
EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section)
EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event);
EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence);
EXPOSE_END_FENCE(impl::kokkosp_end_fence);
} // extern "C"