diff --git a/src/acc/acc_bench_smm.c b/src/acc/acc_bench_smm.c index 61bfba29927..26bd167ca41 100644 --- a/src/acc/acc_bench_smm.c +++ b/src/acc/acc_bench_smm.c @@ -222,21 +222,25 @@ int main(int argc, char* argv[]) { #endif CHECK(libsmm_acc_init(), &result, check); /* note: libsmm_acc_init() may imply acc_init() */ if (EXIT_SUCCESS == result) { - const char* const env_device = getenv("DEVICE"); - const int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device)); int ndevices = 0; result = c_dbcsr_acc_get_ndevices(&ndevices); - if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) { - printf("Activated device%i (ndevices=%i)\n", device, ndevices); - } - else { - if (0 >= ndevices) { - fprintf(stderr, "ERROR: No ACC-device found!\n"); + if (EXIT_SUCCESS == result && 0 < ndevices) { + const char* const env_device = getenv("DEVICE"); + const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK")); + const int rank = (NULL != env_rank ? atoi(env_rank) : -1); + int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device)); + device = ((0 <= device && device < ndevices) ? (0 <= rank ? (rank % ndevices) : device) : -1); + result = c_dbcsr_acc_set_active_device(device); + if (EXIT_SUCCESS == result) { + printf("Activated device%i (ndevices=%i)\n", device, ndevices); } else { - fprintf(stderr, "ERROR: Failed to activate device %i of %i!\n", device, ndevices); + fprintf(stderr, "ERROR: Failed to activate device!\n"); } - result = EXIT_FAILURE; + } + else { + fprintf(stderr, "ERROR: No ACC-device found!\n"); + if (EXIT_SUCCESS == result) result = EXIT_FAILURE; } if (EXIT_SUCCESS == result) { rnd = (int*)malloc(sizeof(int) * NRAND); diff --git a/src/acc/acc_bench_trans.c b/src/acc/acc_bench_trans.c index 07101a187df..2f9485b839c 100644 --- a/src/acc/acc_bench_trans.c +++ b/src/acc/acc_bench_trans.c @@ -106,52 +106,48 @@ int main(int argc, char* argv[]) { #else const int warmup = 0; #endif - const char* const env_device = getenv("DEVICE"); - const int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device)); int *stack_hst = NULL, *stack_dev = NULL; ELEM_TYPE *mat_hst = NULL, *mat_dev = NULL; - int result = EXIT_SUCCESS, ndevices = 0, r, i, mm = m, nn = n; + int result = EXIT_SUCCESS, mm = m, nn = n, r, i; void* stream = NULL; #if defined(USE_LIBXSMM) libxsmm_timer_tickint start; double duration; #endif assert(m <= (mn / n) && 0 == (mn % n)); + if (MAX_KERNEL_DIM < m || MAX_KERNEL_DIM < n) { + fprintf(stderr, "Matrix shape exceeds MAX_KERNEL_DIM!\n"); + result = EXIT_FAILURE; + } CHECK(c_dbcsr_acc_init(), &result); /* note: libsmm_acc_init() may imply acc_init() */ CHECK(libsmm_acc_init(), &result); if (EXIT_SUCCESS == result) { + int ndevices = 0; result = c_dbcsr_acc_get_ndevices(&ndevices); - if (0 < ndevices && (0 == device || EXIT_SUCCESS == c_dbcsr_acc_set_active_device(device))) { - printf("Activated device%i (ndevices=%i)\n", device, ndevices); - } - else { - if (0 >= ndevices) { - fprintf(stderr, "No ACC-device found!\n"); + if (EXIT_SUCCESS == result && 0 < ndevices) { + const char* const env_device = getenv("DEVICE"); + const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK")); + const int rank = (NULL != env_rank ? atoi(env_rank) : -1); + int device = ((NULL == env_device || '\0' == *env_device) ? 0 : atoi(env_device)); + device = ((0 <= device && device < ndevices) ? (0 <= rank ? (rank % ndevices) : device) : -1); + result = c_dbcsr_acc_set_active_device(device); + if (EXIT_SUCCESS == result) { + printf("Activated device%i (ndevices=%i)\n", device, ndevices); + printf("%s%s%i %i %i %i\n", 0 < argc ? argv[0] : "", 0 < argc ? " " : "", nrepeat, stack_size, m, n); + printf("typename (id=%i): %s\n", DBCSR_TYPE(ELEM_TYPE), DBCSR_STRINGIFY(ELEM_TYPE)); } else { - fprintf(stderr, "Failed to activate device %i of %i!\n", device, ndevices); + fprintf(stderr, "ERROR: Failed to activate device!\n"); } -#if !defined(__CUDA) - CHECK(libsmm_acc_finalize(), NULL); -#endif - CHECK(c_dbcsr_acc_finalize(), NULL); - return result; + } + else { + fprintf(stderr, "ERROR: No ACC-device found!\n"); + if (EXIT_SUCCESS == result) result = EXIT_FAILURE; } } else { fprintf(stderr, "ACC initialization failed!\n"); -#if !defined(__CUDA) - CHECK(libsmm_acc_finalize(), NULL); -#endif - CHECK(c_dbcsr_acc_finalize(), NULL); - return result; - } - printf("%s%s%i %i %i %i\n", 0 < argc ? argv[0] : "", 0 < argc ? " " : "", nrepeat, stack_size, m, n); - printf("typename (id=%i): %s\n", DBCSR_TYPE(ELEM_TYPE), DBCSR_STRINGIFY(ELEM_TYPE)); - if (MAX_KERNEL_DIM < m || MAX_KERNEL_DIM < n) { - fprintf(stderr, "Matrix shape exceeds MAX_KERNEL_DIM!\n"); - result = EXIT_FAILURE; } #if defined(PRIORITY) CHECK(c_dbcsr_acc_stream_priority_range(&priomin, &priomax), &result); @@ -259,7 +255,7 @@ int main(int argc, char* argv[]) { CHECK(c_dbcsr_acc_finalize(), NULL); if (EXIT_SUCCESS != result) { if (-1 != result) { - fprintf(stderr, "FAILED\n"); + fprintf(stderr, "\nFAILED\n\n"); } else { fprintf(stderr, "Kernel not suitable!\n"); diff --git a/src/acc/opencl/acc_opencl.c b/src/acc/opencl/acc_opencl.c index 2168c497c97..8d4e42b807f 100644 --- a/src/acc/opencl/acc_opencl.c +++ b/src/acc/opencl/acc_opencl.c @@ -641,6 +641,11 @@ int c_dbcsr_acc_init(void) { result = c_dbcsr_acc_opencl_set_active_device(NULL /*lock*/, ACC_OPENCL_ACTIVATE); } else { + const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK")); + const int rank = (NULL != env_rank ? atoi(env_rank) : 0); + if (0 < rank && 1 < c_dbcsr_acc_opencl_config.ndevices) { + device_id = rank % c_dbcsr_acc_opencl_config.ndevices; + } result = c_dbcsr_acc_opencl_set_active_device(NULL /*lock*/, device_id); } # else @@ -1002,17 +1007,8 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i int result = EXIT_SUCCESS; assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS); if (0 <= device_id && device_id < c_dbcsr_acc_opencl_config.ndevices) { - cl_device_id active_id = NULL; - static int rank = -1; - if (0 > rank) { - const char* const env_rank = (NULL != getenv("PMI_RANK") ? getenv("PMI_RANK") : getenv("OMPI_COMM_WORLD_LOCAL_RANK")); - rank = (NULL != env_rank ? atoi(env_rank) : 0); - } - if (0 < rank && 1 < c_dbcsr_acc_opencl_config.ndevices) { - device_id = (device_id + rank) % c_dbcsr_acc_opencl_config.ndevices; - } /* accessing devices is thread-safe (array is fixed after initialization) */ - active_id = c_dbcsr_acc_opencl_config.devices[device_id]; + const cl_device_id active_id = c_dbcsr_acc_opencl_config.devices[device_id]; if (NULL != active_id) { cl_device_id context_id = NULL; cl_context context = NULL; @@ -1172,13 +1168,8 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i int c_dbcsr_acc_set_active_device(int device_id) { + /* avoid ACC_OPENCL_PROFILE in this routine */ int result = EXIT_SUCCESS; -# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE) && 0 - int routine_handle; - static const char* const routine_name_ptr = LIBXSMM_FUNCNAME; - static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1; - c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle); -# endif if (0 <= device_id && device_id < c_dbcsr_acc_opencl_config.ndevices) { # if defined(ACC_OPENCL_CACHE_DID) if (c_dbcsr_acc_opencl_active_id != (device_id + 1)) @@ -1190,12 +1181,7 @@ int c_dbcsr_acc_set_active_device(int device_id) { # endif } } -# if !defined(NDEBUG) else result = EXIT_FAILURE; -# endif -# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE) && 0 - c_dbcsr_timestop(&routine_handle); -# endif ACC_OPENCL_RETURN(result); }