From 9b5a4bc880f594891e7fb95ce4e4e57947a03632 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 18 Nov 2023 18:04:57 +0100 Subject: [PATCH] [HIPIFY][#675][#677][SOLVER][feature] `cuSOLVER` support - Step 11 - Functions (DN) + `cusolverDn(G|S)etStream` are `SUPPORTED` + `cusolverDn(G|S)SetDeterministicMode`, `cusolverDnIRSParams(Create|Destroy)`, `cusolverDnIRSParams*` are `UNSUPPORTED` + Updated `SOLVER` synthetic tests, the regenerated hipify-perl, and `SOLVER` `CUDA2HIP` documentation --- bin/hipify-perl | 21 ++++++++++++ docs/tables/CUSOLVER_API_supported_by_HIP.md | 17 ++++++++++ .../CUSOLVER_API_supported_by_HIP_and_ROC.md | 17 ++++++++++ docs/tables/CUSOLVER_API_supported_by_ROC.md | 17 ++++++++++ src/CUDA2HIP_SOLVER_API_functions.cpp | 34 +++++++++++++++++++ .../synthetic/libraries/cusolver2hipsolver.cu | 13 +++++++ .../synthetic/libraries/cusolver2rocsolver.cu | 13 +++++++ 7 files changed, 132 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 29c19b81..18105ada 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1083,7 +1083,9 @@ my %experimental_funcs = ( "cusolverDnSgetrs" => "6.1.0", "cusolverDnSgetrf_bufferSize" => "6.1.0", "cusolverDnSgetrf" => "6.1.0", + "cusolverDnSetStream" => "6.1.0", "cusolverDnHandle_t" => "6.1.0", + "cusolverDnGetStream" => "6.1.0", "cusolverDnDgetrs" => "6.1.0", "cusolverDnDgetrf_bufferSize" => "6.1.0", "cusolverDnDgetrf" => "6.1.0", @@ -1249,6 +1251,8 @@ sub experimentalSubstitutions { subst("cusolverDnDgetrf", "hipsolverDnDgetrf", "library"); subst("cusolverDnDgetrf_bufferSize", "hipsolverDnDgetrf_bufferSize", "library"); subst("cusolverDnDgetrs", "hipsolverDnDgetrs", "library"); + subst("cusolverDnGetStream", "hipsolverGetStream", "library"); + subst("cusolverDnSetStream", "hipsolverSetStream", "library"); subst("cusolverDnSgetrf", "hipsolverDnSgetrf", "library"); subst("cusolverDnSgetrf_bufferSize", "hipsolverDnSgetrf_bufferSize", "library"); subst("cusolverDnSgetrs", "hipsolverDnSgetrs", "library"); @@ -1747,6 +1751,8 @@ sub rocSubstitutions { subst("cudnnTransformTensor", "miopenTransformTensor", "library"); subst("cusolverDnCreate", "rocblas_create_handle", "library"); subst("cusolverDnDestroy", "rocblas_destroy_handle", "library"); + subst("cusolverDnGetStream", "rocblas_get_stream", "library"); + subst("cusolverDnSetStream", "rocblas_set_stream", "library"); subst("cusparseAxpby", "rocsparse_axpby", "library"); subst("cusparseBlockedEllGet", "rocsparse_bell_get", "library"); subst("cusparseCbsr2csr", "rocsparse_cbsr2csr", "library"); @@ -7077,13 +7083,28 @@ sub warnUnsupportedFunctions { "cusolverDnXgetrs", "cusolverDnXgetrf_bufferSize", "cusolverDnXgetrf", + "cusolverDnSetDeterministicMode", "cusolverDnSetAdvOptions", "cusolverDnParams_t", "cusolverDnParams", "cusolverDnIRSParams_t", + "cusolverDnIRSParamsSetTolInner", + "cusolverDnIRSParamsSetTol", + "cusolverDnIRSParamsSetSolverPrecisions", + "cusolverDnIRSParamsSetSolverMainPrecision", + "cusolverDnIRSParamsSetSolverLowestPrecision", + "cusolverDnIRSParamsSetRefinementSolver", + "cusolverDnIRSParamsSetMaxItersInner", + "cusolverDnIRSParamsSetMaxIters", + "cusolverDnIRSParamsGetMaxIters", + "cusolverDnIRSParamsEnableFallback", + "cusolverDnIRSParamsDisableFallback", + "cusolverDnIRSParamsDestroy", + "cusolverDnIRSParamsCreate", "cusolverDnIRSParams", "cusolverDnIRSInfos_t", "cusolverDnIRSInfos", + "cusolverDnGetDeterministicMode", "cusolverDnFunction_t", "cusolverDnCreateParams", "cusolverDnContext", diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP.md b/docs/tables/CUSOLVER_API_supported_by_HIP.md index c092e782..a7cf83e8 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP.md @@ -113,7 +113,24 @@ |`cusolverDnDgetrf`| | | | |`hipsolverDnDgetrf`|5.1.0| | | |6.1.0| |`cusolverDnDgetrf_bufferSize`| | | | |`hipsolverDnDgetrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnDgetrs`| | | | |`hipsolverDnDgetrs`|5.1.0| | | |6.1.0| +|`cusolverDnGetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnGetStream`| | | | |`hipsolverGetStream`|4.5.0| | | |6.1.0| +|`cusolverDnIRSParamsCreate`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsDestroy`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsDisableFallback`|11.0| | | | | | | | | | +|`cusolverDnIRSParamsEnableFallback`|11.0| | | | | | | | | | +|`cusolverDnIRSParamsGetMaxIters`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetMaxIters`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetMaxItersInner`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetRefinementSolver`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverLowestPrecision`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverMainPrecision`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverPrecisions`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetTol`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetTolInner`|10.2| | | | | | | | | | |`cusolverDnSetAdvOptions`|11.0| | | | | | | | | | +|`cusolverDnSetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnSetStream`| | | | |`hipsolverSetStream`|4.5.0| | | |6.1.0| |`cusolverDnSgetrf`| | | | |`hipsolverDnSgetrf`|5.1.0| | | |6.1.0| |`cusolverDnSgetrf_bufferSize`| | | | |`hipsolverDnSgetrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnSgetrs`| | | | |`hipsolverDnSgetrs`|5.1.0| | | |6.1.0| diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md index 5a7b9cb0..dc94d31a 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md @@ -113,7 +113,24 @@ |`cusolverDnDgetrf`| | | | |`hipsolverDnDgetrf`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgetrf_bufferSize`| | | | |`hipsolverDnDgetrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgetrs`| | | | |`hipsolverDnDgetrs`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnGetDeterministicMode`|12.2| | | | | | | | | | | | | | | | +|`cusolverDnGetStream`| | | | |`hipsolverGetStream`|4.5.0| | | |6.1.0|`rocblas_get_stream`| | | | | | +|`cusolverDnIRSParamsCreate`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsDestroy`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsDisableFallback`|11.0| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsEnableFallback`|11.0| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsGetMaxIters`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetMaxIters`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetMaxItersInner`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetRefinementSolver`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetSolverLowestPrecision`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetSolverMainPrecision`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetSolverPrecisions`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetTol`|10.2| | | | | | | | | | | | | | | | +|`cusolverDnIRSParamsSetTolInner`|10.2| | | | | | | | | | | | | | | | |`cusolverDnSetAdvOptions`|11.0| | | | | | | | | | | | | | | | +|`cusolverDnSetDeterministicMode`|12.2| | | | | | | | | | | | | | | | +|`cusolverDnSetStream`| | | | |`hipsolverSetStream`|4.5.0| | | |6.1.0|`rocblas_set_stream`| | | | | | |`cusolverDnSgetrf`| | | | |`hipsolverDnSgetrf`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgetrf_bufferSize`| | | | |`hipsolverDnSgetrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgetrs`| | | | |`hipsolverDnSgetrs`|5.1.0| | | |6.1.0| | | | | | | diff --git a/docs/tables/CUSOLVER_API_supported_by_ROC.md b/docs/tables/CUSOLVER_API_supported_by_ROC.md index 499f14a3..d3cbe321 100644 --- a/docs/tables/CUSOLVER_API_supported_by_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_ROC.md @@ -113,7 +113,24 @@ |`cusolverDnDgetrf`| | | | | | | | | | | |`cusolverDnDgetrf_bufferSize`| | | | | | | | | | | |`cusolverDnDgetrs`| | | | | | | | | | | +|`cusolverDnGetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnGetStream`| | | | |`rocblas_get_stream`| | | | | | +|`cusolverDnIRSParamsCreate`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsDestroy`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsDisableFallback`|11.0| | | | | | | | | | +|`cusolverDnIRSParamsEnableFallback`|11.0| | | | | | | | | | +|`cusolverDnIRSParamsGetMaxIters`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetMaxIters`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetMaxItersInner`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetRefinementSolver`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverLowestPrecision`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverMainPrecision`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetSolverPrecisions`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetTol`|10.2| | | | | | | | | | +|`cusolverDnIRSParamsSetTolInner`|10.2| | | | | | | | | | |`cusolverDnSetAdvOptions`|11.0| | | | | | | | | | +|`cusolverDnSetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnSetStream`| | | | |`rocblas_set_stream`| | | | | | |`cusolverDnSgetrf`| | | | | | | | | | | |`cusolverDnSgetrf_bufferSize`| | | | | | | | | | | |`cusolverDnSgetrs`| | | | | | | | | | | diff --git a/src/CUDA2HIP_SOLVER_API_functions.cpp b/src/CUDA2HIP_SOLVER_API_functions.cpp index 8a498b94..e93c1da5 100644 --- a/src/CUDA2HIP_SOLVER_API_functions.cpp +++ b/src/CUDA2HIP_SOLVER_API_functions.cpp @@ -43,6 +43,23 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnXgetrs", {"hipsolverDnXgetrs", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnCreateParams", {"hipsolverDnCreateParams", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnSetAdvOptions", {"hipsolverDnSetAdvOptions", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnSetStream", {"hipsolverSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_SOLVER, 2, HIP_EXPERIMENTAL}}, + {"cusolverDnGetStream", {"hipsolverGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_SOLVER, 2, HIP_EXPERIMENTAL}}, + {"cusolverDnSetDeterministicMode", {"hipsolverDnSetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnGetDeterministicMode", {"hipsolverDnGetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsCreate", {"hipsolverDnIRSParamsCreate", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsDestroy", {"hipsolverDnIRSParamsDestroy", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetRefinementSolver", {"hipsolverDnIRSParamsSetRefinementSolver", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetSolverMainPrecision", {"hipsolverDnIRSParamsSetSolverMainPrecision", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetSolverLowestPrecision", {"hipsolverDnIRSParamsSetSolverLowestPrecision", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetSolverPrecisions", {"hipsolverDnIRSParamsSetSolverPrecisions", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetTol", {"hipsolverDnIRSParamsSetTol", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetTolInner", {"hipsolverDnIRSParamsSetTolInner", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetMaxIters", {"hipsolverDnIRSParamsSetMaxIters", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsSetMaxItersInner", {"hipsolverDnIRSParamsSetMaxItersInner", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsGetMaxIters", {"hipsolverDnIRSParamsGetMaxIters", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsEnableFallback", {"hipsolverDnIRSParamsEnableFallback", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnIRSParamsDisableFallback", {"hipsolverDnIRSParamsDisableFallback", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, }; const std::map CUDA_SOLVER_FUNCTION_VER_MAP { @@ -51,6 +68,21 @@ const std::map CUDA_SOLVER_FUNCTION_VER_MAP { {"cusolverDnXgetrf", {CUDA_111, CUDA_0, CUDA_0}}, {"cusolverDnXgetrf_bufferSize", {CUDA_111, CUDA_0, CUDA_0}}, {"cusolverDnXgetrs", {CUDA_111, CUDA_0, CUDA_0}}, + {"cusolverDnSetDeterministicMode", {CUDA_122, CUDA_0, CUDA_0}}, + {"cusolverDnGetDeterministicMode", {CUDA_122, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsCreate", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsDestroy", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetRefinementSolver", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetSolverMainPrecision", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetSolverLowestPrecision", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetSolverPrecisions", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetTol", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetTolInner", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetMaxIters", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsSetMaxItersInner", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsGetMaxIters", {CUDA_102, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsEnableFallback", {CUDA_110, CUDA_0, CUDA_0}}, + {"cusolverDnIRSParamsDisableFallback", {CUDA_110, CUDA_0, CUDA_0}}, }; const std::map HIP_SOLVER_FUNCTION_VER_MAP { @@ -62,6 +94,8 @@ const std::map HIP_SOLVER_FUNCTION_VER_MAP { {"hipsolverDnSgetrf_bufferSize", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnDgetrs", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnSgetrs", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverSetStream", {HIP_4050, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverGetStream", {HIP_4050, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_SOLVER_API_SECTION_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu index cbb76a95..0c6537b7 100644 --- a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu +++ b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu @@ -53,6 +53,9 @@ int main() { // CHECK: hipblasOperation_t blasOperation; cublasOperation_t blasOperation; + // CHECK: hipStream_t stream_t; + cudaStream_t stream_t; + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnCreate(cusolverDnHandle_t *handle); // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnCreate(hipsolverHandle_t* handle); // CHECK: status = hipsolverDnCreate(&handle); @@ -93,6 +96,16 @@ int main() { // CHECK: status = hipsolverDnSgetrs(handle, blasOperation, n, nrhs , &fA, lda, &devIpiv, &fB, ldb, &devInfo); status = cusolverDnSgetrs(handle, blasOperation, n, nrhs , &fA, lda, &devIpiv, &fB, ldb, &devInfo); + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, cudaStream_t streamId); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t streamId); + // CHECK: status = hipsolverSetStream(handle, stream_t); + status = cusolverDnSetStream(handle, stream_t); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, cudaStream_t *streamId); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t* streamId); + // CHECK: status = hipsolverGetStream(handle, &stream_t); + status = cusolverDnGetStream(handle, &stream_t); + #if CUDA_VERSION >= 8000 // CHECK: hipsolverEigType_t eigType; // CHECK-NEXT: hipsolverEigType_t EIG_TYPE_1 = HIPSOLVER_EIG_TYPE_1; diff --git a/tests/unit_tests/synthetic/libraries/cusolver2rocsolver.cu b/tests/unit_tests/synthetic/libraries/cusolver2rocsolver.cu index a32afcd7..7899f676 100644 --- a/tests/unit_tests/synthetic/libraries/cusolver2rocsolver.cu +++ b/tests/unit_tests/synthetic/libraries/cusolver2rocsolver.cu @@ -12,6 +12,9 @@ int main() { // CHECK: rocblas_handle handle; cusolverDnHandle_t handle; + // CHECK: hipStream_t stream_t; + cudaStream_t stream_t; + // CHECK: rocblas_status status; // CHECK-NEXT: rocblas_status STATUS_SUCCESS = rocblas_status_success; // CHECK-NEXT: rocblas_status STATUS_NOT_INITIALIZED = rocblas_status_invalid_handle; @@ -45,6 +48,16 @@ int main() { // CHECK: status = rocblas_destroy_handle(handle); status = cusolverDnDestroy(handle); + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSetStream(cusolverDnHandle_t handle, cudaStream_t streamId); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_stream(rocblas_handle handle, hipStream_t stream); + // CHECK: status = rocblas_set_stream(handle, stream_t); + status = cusolverDnSetStream(handle, stream_t); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnGetStream(cusolverDnHandle_t handle, cudaStream_t *streamId); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_stream(rocblas_handle handle, hipStream_t* stream); + // CHECK: status = rocblas_get_stream(handle, &stream_t); + status = cusolverDnGetStream(handle, &stream_t); + #if CUDA_VERSION >= 8000 // CHECK: rocblas_eform eigType; // CHECK-NEXT: rocblas_eform EIG_TYPE_1 = rocblas_eform_ax;