Skip to content

Commit

Permalink
ocl: minor corrections (#844)
Browse files Browse the repository at this point in the history
* Apply rank-aware device-id only if ACC_OPENCL_ACTIVATE is defined.
* Forward error code and cleanup (c_dbcsr_acc_set_active_device).
* More sensible output in case of upfront error.
* Made selecting device rank-aware. Cleanup.
  • Loading branch information
hfp authored Sep 12, 2024
1 parent 4c700a6 commit b358e95
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 58 deletions.
24 changes: 14 additions & 10 deletions src/acc/acc_bench_smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,21 +222,25 @@ int main(int argc, char* argv[]) {
#endif
CHECK(libsmm_acc_init(), &result, check); /* note: libsmm_acc_init() may imply acc_init() */
if (EXIT_SUCCESS == result) {
const char* const env_device = getenv("DEVICE");
const int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device));
int ndevices = 0;
result = c_dbcsr_acc_get_ndevices(&ndevices);
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
}
else {
if (0 >= ndevices) {
fprintf(stderr, "ERROR: No ACC-device found!\n");
if (EXIT_SUCCESS == result && 0 < ndevices) {
const char* const env_device = getenv("DEVICE");
const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK"));
const int rank = (NULL != env_rank ? atoi(env_rank) : -1);
int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device));
device = ((0 <= device && device < ndevices) ? (0 <= rank ? (rank % ndevices) : device) : -1);
result = c_dbcsr_acc_set_active_device(device);
if (EXIT_SUCCESS == result) {
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
}
else {
fprintf(stderr, "ERROR: Failed to activate device %i of %i!\n", device, ndevices);
fprintf(stderr, "ERROR: Failed to activate device!\n");
}
result = EXIT_FAILURE;
}
else {
fprintf(stderr, "ERROR: No ACC-device found!\n");
if (EXIT_SUCCESS == result) result = EXIT_FAILURE;
}
if (EXIT_SUCCESS == result) {
rnd = (int*)malloc(sizeof(int) * NRAND);
Expand Down
50 changes: 23 additions & 27 deletions src/acc/acc_bench_trans.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,52 +106,48 @@ int main(int argc, char* argv[]) {
#else
const int warmup = 0;
#endif
const char* const env_device = getenv("DEVICE");
const int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device));
int *stack_hst = NULL, *stack_dev = NULL;
ELEM_TYPE *mat_hst = NULL, *mat_dev = NULL;
int result = EXIT_SUCCESS, ndevices = 0, r, i, mm = m, nn = n;
int result = EXIT_SUCCESS, mm = m, nn = n, r, i;
void* stream = NULL;
#if defined(USE_LIBXSMM)
libxsmm_timer_tickint start;
double duration;
#endif
assert(m <= (mn / n) && 0 == (mn % n));
if (MAX_KERNEL_DIM < m || MAX_KERNEL_DIM < n) {
fprintf(stderr, "Matrix shape exceeds MAX_KERNEL_DIM!\n");
result = EXIT_FAILURE;
}
CHECK(c_dbcsr_acc_init(), &result);
/* note: libsmm_acc_init() may imply acc_init() */
CHECK(libsmm_acc_init(), &result);
if (EXIT_SUCCESS == result) {
int ndevices = 0;
result = c_dbcsr_acc_get_ndevices(&ndevices);
if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) {
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
}
else {
if (0 >= ndevices) {
fprintf(stderr, "No ACC-device found!\n");
if (EXIT_SUCCESS == result && 0 < ndevices) {
const char* const env_device = getenv("DEVICE");
const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK"));
const int rank = (NULL != env_rank ? atoi(env_rank) : -1);
int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device));
device = ((0 <= device && device < ndevices) ? (0 <= rank ? (rank % ndevices) : device) : -1);
result = c_dbcsr_acc_set_active_device(device);
if (EXIT_SUCCESS == result) {
printf("Activated device%i (ndevices=%i)\n", device, ndevices);
printf("%s%s%i %i %i %i\n", 0 < argc ? argv[0] : "", 0 < argc ? " " : "", nrepeat, stack_size, m, n);
printf("typename (id=%i): %s\n", DBCSR_TYPE(ELEM_TYPE), DBCSR_STRINGIFY(ELEM_TYPE));
}
else {
fprintf(stderr, "Failed to activate device %i of %i!\n", device, ndevices);
fprintf(stderr, "ERROR: Failed to activate device!\n");
}
#if !defined(__CUDA)
CHECK(libsmm_acc_finalize(), NULL);
#endif
CHECK(c_dbcsr_acc_finalize(), NULL);
return result;
}
else {
fprintf(stderr, "ERROR: No ACC-device found!\n");
if (EXIT_SUCCESS == result) result = EXIT_FAILURE;
}
}
else {
fprintf(stderr, "ACC initialization failed!\n");
#if !defined(__CUDA)
CHECK(libsmm_acc_finalize(), NULL);
#endif
CHECK(c_dbcsr_acc_finalize(), NULL);
return result;
}
printf("%s%s%i %i %i %i\n", 0 < argc ? argv[0] : "", 0 < argc ? " " : "", nrepeat, stack_size, m, n);
printf("typename (id=%i): %s\n", DBCSR_TYPE(ELEM_TYPE), DBCSR_STRINGIFY(ELEM_TYPE));
if (MAX_KERNEL_DIM < m || MAX_KERNEL_DIM < n) {
fprintf(stderr, "Matrix shape exceeds MAX_KERNEL_DIM!\n");
result = EXIT_FAILURE;
}
#if defined(PRIORITY)
CHECK(c_dbcsr_acc_stream_priority_range(&priomin, &priomax), &result);
Expand Down Expand Up @@ -259,7 +255,7 @@ int main(int argc, char* argv[]) {
CHECK(c_dbcsr_acc_finalize(), NULL);
if (EXIT_SUCCESS != result) {
if (-1 != result) {
fprintf(stderr, "FAILED\n");
fprintf(stderr, "\nFAILED\n\n");
}
else {
fprintf(stderr, "Kernel not suitable!\n");
Expand Down
28 changes: 7 additions & 21 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,11 @@ int c_dbcsr_acc_init(void) {
result = c_dbcsr_acc_opencl_set_active_device(NULL /*lock*/, ACC_OPENCL_ACTIVATE);
}
else {
const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK"));
const int rank = (NULL != env_rank ? atoi(env_rank) : 0);
if (0 < rank && 1 < c_dbcsr_acc_opencl_config.ndevices) {
device_id = rank % c_dbcsr_acc_opencl_config.ndevices;
}
result = c_dbcsr_acc_opencl_set_active_device(NULL /*lock*/, device_id);
}
# else
Expand Down Expand Up @@ -1002,17 +1007,8 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
int result = EXIT_SUCCESS;
assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS);
if (0 <= device_id && device_id < c_dbcsr_acc_opencl_config.ndevices) {
cl_device_id active_id = NULL;
static int rank = -1;
if (0 > rank) {
const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK"));
rank = (NULL != env_rank ? atoi(env_rank) : 0);
}
if (0 < rank && 1 < c_dbcsr_acc_opencl_config.ndevices) {
device_id = (device_id + rank) % c_dbcsr_acc_opencl_config.ndevices;
}
/* accessing devices is thread-safe (array is fixed after initialization) */
active_id = c_dbcsr_acc_opencl_config.devices[device_id];
const cl_device_id active_id = c_dbcsr_acc_opencl_config.devices[device_id];
if (NULL != active_id) {
cl_device_id context_id = NULL;
cl_context context = NULL;
Expand Down Expand Up @@ -1172,13 +1168,8 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i


int c_dbcsr_acc_set_active_device(int device_id) {
/* avoid ACC_OPENCL_PROFILE in this routine */
int result = EXIT_SUCCESS;
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE) && 0
int routine_handle;
static const char* const routine_name_ptr = LIBXSMM_FUNCNAME;
static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1;
c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle);
# endif
if (0 <= device_id && device_id < c_dbcsr_acc_opencl_config.ndevices) {
# if defined(ACC_OPENCL_CACHE_DID)
if (c_dbcsr_acc_opencl_active_id != (device_id + 1))
Expand All @@ -1190,12 +1181,7 @@ int c_dbcsr_acc_set_active_device(int device_id) {
# endif
}
}
# if !defined(NDEBUG)
else result = EXIT_FAILURE;
# endif
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE) && 0
c_dbcsr_timestop(&routine_handle);
# endif
ACC_OPENCL_RETURN(result);
}

Expand Down

0 comments on commit b358e95

Please sign in to comment.