Skip to content

Commit 2ddedef

Browse files
authored
Merge pull request #209 from vlkale/fenceOnSampleOnly
Fence on sample only
2 parents c3e85a6 + 1f0adb4 commit 2ddedef

File tree

3 files changed

+66
-6
lines changed

3 files changed

+66
-6
lines changed

common/kokkos-sampler/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CXX = clang++
1+
CXX = g++
22

33
CXXFLAGS = -O3 -std=c++17 -g
44

common/kokkos-sampler/kp_sampler_skip.cpp

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,47 @@ static endFunction endReduceCallee = NULL;
3333

3434
void kokkosp_request_tool_settings(const uint32_t,
3535
Kokkos_Tools_ToolSettings* settings) {
36-
if (0 == tool_globFence) {
37-
settings->requires_global_fencing = false;
36+
settings->requires_global_fencing = false;
37+
}
38+
39+
// set of functions from Kokkos ToolProgrammingInterface (includes fence)
40+
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;
41+
42+
uint32_t getDeviceID(uint32_t devid_in) {
43+
int num_device_bits = 7;
44+
int num_instance_bits = 17;
45+
return (~((uint32_t(-1)) << num_device_bits)) &
46+
(devid_in >> num_instance_bits);
47+
}
48+
49+
void invoke_ktools_fence(uint32_t devID) {
50+
if (tpi_funcs.fence != nullptr) {
51+
tpi_funcs.fence(devID);
52+
if (tool_verbosity > 1) {
53+
printf(
54+
"KokkosP: Sampler utility sucessfully invoked "
55+
" tool-induced fence on device %d\n",
56+
getDeviceID(devID));
57+
}
3858
} else {
39-
settings->requires_global_fencing = true;
59+
printf(
60+
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
61+
"Fence is NULL!\n");
62+
exit(-1);
4063
}
4164
}
4265

66+
void kokkosp_provide_tool_programming_interface(
67+
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
68+
if (!num_funcs) {
69+
if (tool_verbosity > 0)
70+
printf(
71+
"KokkosP: Note: Number of functions in Tools Programming Interface "
72+
"is 0!\n");
73+
}
74+
tpi_funcs = *funcsFromTPI;
75+
}
76+
4377
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
4478
const uint32_t devInfoCount, void* deviceInfo) {
4579
const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE");
@@ -164,6 +198,9 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
164198
printf("KokkosP: sample %llu calling child-begin function...\n",
165199
(unsigned long long)(*kID));
166200
}
201+
if (tool_globFence) {
202+
invoke_ktools_fence(0);
203+
}
167204
if (NULL != beginForCallee) {
168205
uint64_t nestedkID = 0;
169206
(*beginForCallee)(name, devID, &nestedkID);
@@ -180,6 +217,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
180217
printf("KokkosP: sample %llu calling child-end function...\n",
181218
(unsigned long long)(kID));
182219
}
220+
if (tool_globFence) {
221+
invoke_ktools_fence(0);
222+
}
183223
(*endForCallee)(retrievedNestedkID);
184224
infokIDSample.erase(kID);
185225
}
@@ -198,6 +238,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
198238
}
199239
if (NULL != beginScanCallee) {
200240
uint64_t nestedkID = 0;
241+
if (tool_globFence) {
242+
invoke_ktools_fence(0);
243+
}
201244
(*beginScanCallee)(name, devID, &nestedkID);
202245
infokIDSample.insert({*kID, nestedkID});
203246
}
@@ -212,6 +255,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
212255
printf("KokkosP: sample %llu calling child-end function...\n",
213256
(unsigned long long)(kID));
214257
}
258+
if (tool_globFence) {
259+
invoke_ktools_fence(0);
260+
}
215261
(*endScanCallee)(retrievedNestedkID);
216262
infokIDSample.erase(kID);
217263
}
@@ -228,9 +274,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
228274
printf("KokkosP: sample %llu calling child-begin function...\n",
229275
(unsigned long long)(*kID));
230276
}
231-
232277
if (NULL != beginReduceCallee) {
233278
uint64_t nestedkID = 0;
279+
if (tool_globFence) {
280+
invoke_ktools_fence(0);
281+
}
234282
(*beginReduceCallee)(name, devID, &nestedkID);
235283
infokIDSample.insert({*kID, nestedkID});
236284
}
@@ -245,6 +293,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
245293
printf("KokkosP: sample %llu calling child-end function...\n",
246294
(unsigned long long)(kID));
247295
}
296+
if (tool_globFence) {
297+
invoke_ktools_fence(0);
298+
}
248299
(*endScanCallee)(retrievedNestedkID);
249300
infokIDSample.erase(kID);
250301
}
@@ -257,8 +308,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
257308
extern "C" {
258309

259310
namespace impl = KokkosTools::Sampler;
260-
261311
EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
312+
EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(
313+
impl::kokkosp_provide_tool_programming_interface)
262314
EXPOSE_INIT(impl::kokkosp_init_library)
263315
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
264316
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)

profiling/all/kp_core.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle;
4848
#define EXPOSE_PROFILE_EVENT(FUNC_NAME)
4949
#define EXPOSE_BEGIN_FENCE(FUNC_NAME)
5050
#define EXPOSE_END_FENCE(FUNC_NAME)
51+
#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME)
5152

5253
#else
5354

55+
#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \
56+
__attribute__((weak)) void kokkosp_provide_tool_programming_interface( \
57+
const uint32_t num_actions, \
58+
Kokkos_Tools_ToolProgrammingInterface* ptpi) { \
59+
FUNC_NAME(num_actions, ptpi); \
60+
}
61+
5462
#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \
5563
__attribute__((weak)) void kokkosp_request_tool_settings( \
5664
const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \

0 commit comments

Comments
 (0)