From 3050a20114105fd7135cace5439974cc2a51bd2c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 19 Dec 2023 15:58:20 +0000 Subject: [PATCH] [HIPIFY][#675][#677][SOLVER][feature] `cuSOLVER` support - Step 47 - Functions (DN) + `cusolverDn(S|D|C|Z)gesvdaStridedBatched(_bufferSize)?` are `SUPPORTED` by `hipSOLVER` only + [NOTE] `rocsolver_(s|d|c|z)gesvdx_strided_batched` have a harness of other `ROC` and `HIP` API calls, thus `UNSUPPORTED` + Updated `SOLVER` synthetic tests, the regenerated `hipify-perl`, and `SOLVER` `CUDA2HIP` documentation --- bin/hipify-perl | 16 +++++ docs/tables/CUSOLVER_API_supported_by_HIP.md | 8 +++ .../CUSOLVER_API_supported_by_HIP_and_ROC.md | 8 +++ docs/tables/CUSOLVER_API_supported_by_ROC.md | 8 +++ src/CUDA2HIP_SOLVER_API_functions.cpp | 26 ++++++++ .../synthetic/libraries/cusolver2hipsolver.cu | 66 +++++++++++++++++-- 6 files changed, 128 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index aa52bece..2c4c769c 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1122,6 +1122,8 @@ my %experimental_funcs = ( "cusolverDnZgesvdjBatched_bufferSize" => "6.1.0", "cusolverDnZgesvdjBatched" => "6.1.0", "cusolverDnZgesvdj" => "6.1.0", + "cusolverDnZgesvdaStridedBatched_bufferSize" => "6.1.0", + "cusolverDnZgesvdaStridedBatched" => "6.1.0", "cusolverDnZgesvd_bufferSize" => "6.1.0", "cusolverDnZgesvd" => "6.1.0", "cusolverDnZgeqrf_bufferSize" => "6.1.0", @@ -1184,6 +1186,8 @@ my %experimental_funcs = ( "cusolverDnSgesvdjBatched_bufferSize" => "6.1.0", "cusolverDnSgesvdjBatched" => "6.1.0", "cusolverDnSgesvdj" => "6.1.0", + "cusolverDnSgesvdaStridedBatched_bufferSize" => "6.1.0", + "cusolverDnSgesvdaStridedBatched" => "6.1.0", "cusolverDnSgesvd_bufferSize" => "6.1.0", "cusolverDnSgesvd" => "6.1.0", "cusolverDnSgeqrf_bufferSize" => "6.1.0", @@ -1239,6 +1243,8 @@ my %experimental_funcs = ( "cusolverDnDgesvdjBatched_bufferSize" => "6.1.0", "cusolverDnDgesvdjBatched" => "6.1.0", "cusolverDnDgesvdj" => "6.1.0", + "cusolverDnDgesvdaStridedBatched_bufferSize" => "6.1.0", + "cusolverDnDgesvdaStridedBatched" => "6.1.0", "cusolverDnDgesvd_bufferSize" => "6.1.0", "cusolverDnDgesvd" => "6.1.0", "cusolverDnDgeqrf_bufferSize" => "6.1.0", @@ -1297,6 +1303,8 @@ my %experimental_funcs = ( "cusolverDnCgesvdjBatched_bufferSize" => "6.1.0", "cusolverDnCgesvdjBatched" => "6.1.0", "cusolverDnCgesvdj" => "6.1.0", + "cusolverDnCgesvdaStridedBatched_bufferSize" => "6.1.0", + "cusolverDnCgesvdaStridedBatched" => "6.1.0", "cusolverDnCgesvd_bufferSize" => "6.1.0", "cusolverDnCgesvd" => "6.1.0", "cusolverDnCgeqrf_bufferSize" => "6.1.0", @@ -1472,6 +1480,8 @@ sub experimentalSubstitutions { subst("cusolverDnCgeqrf_bufferSize", "hipsolverDnCgeqrf_bufferSize", "library"); subst("cusolverDnCgesvd", "hipsolverDnCgesvd", "library"); subst("cusolverDnCgesvd_bufferSize", "hipsolverDnCgesvd_bufferSize", "library"); + subst("cusolverDnCgesvdaStridedBatched", "hipsolverDnCgesvdaStridedBatched", "library"); + subst("cusolverDnCgesvdaStridedBatched_bufferSize", "hipsolverDnCgesvdaStridedBatched_bufferSize", "library"); subst("cusolverDnCgesvdj", "hipsolverDnCgesvdj", "library"); subst("cusolverDnCgesvdjBatched", "hipsolverDnCgesvdjBatched", "library"); subst("cusolverDnCgesvdjBatched_bufferSize", "hipsolverDnCgesvdjBatched_bufferSize", "library"); @@ -1530,6 +1540,8 @@ sub experimentalSubstitutions { subst("cusolverDnDgeqrf_bufferSize", "hipsolverDnDgeqrf_bufferSize", "library"); subst("cusolverDnDgesvd", "hipsolverDnDgesvd", "library"); subst("cusolverDnDgesvd_bufferSize", "hipsolverDnDgesvd_bufferSize", "library"); + subst("cusolverDnDgesvdaStridedBatched", "hipsolverDnDgesvdaStridedBatched", "library"); + subst("cusolverDnDgesvdaStridedBatched_bufferSize", "hipsolverDnDgesvdaStridedBatched_bufferSize", "library"); subst("cusolverDnDgesvdj", "hipsolverDnDgesvdj", "library"); subst("cusolverDnDgesvdjBatched", "hipsolverDnDgesvdjBatched", "library"); subst("cusolverDnDgesvdjBatched_bufferSize", "hipsolverDnDgesvdjBatched_bufferSize", "library"); @@ -1584,6 +1596,8 @@ sub experimentalSubstitutions { subst("cusolverDnSgeqrf_bufferSize", "hipsolverDnSgeqrf_bufferSize", "library"); subst("cusolverDnSgesvd", "hipsolverDnSgesvd", "library"); subst("cusolverDnSgesvd_bufferSize", "hipsolverDnSgesvd_bufferSize", "library"); + subst("cusolverDnSgesvdaStridedBatched", "hipsolverDnSgesvdaStridedBatched", "library"); + subst("cusolverDnSgesvdaStridedBatched_bufferSize", "hipsolverDnSgesvdaStridedBatched_bufferSize", "library"); subst("cusolverDnSgesvdj", "hipsolverDnSgesvdj", "library"); subst("cusolverDnSgesvdjBatched", "hipsolverDnSgesvdjBatched", "library"); subst("cusolverDnSgesvdjBatched_bufferSize", "hipsolverDnSgesvdjBatched_bufferSize", "library"); @@ -1646,6 +1660,8 @@ sub experimentalSubstitutions { subst("cusolverDnZgeqrf_bufferSize", "hipsolverDnZgeqrf_bufferSize", "library"); subst("cusolverDnZgesvd", "hipsolverDnZgesvd", "library"); subst("cusolverDnZgesvd_bufferSize", "hipsolverDnZgesvd_bufferSize", "library"); + subst("cusolverDnZgesvdaStridedBatched", "hipsolverDnZgesvdaStridedBatched", "library"); + subst("cusolverDnZgesvdaStridedBatched_bufferSize", "hipsolverDnZgesvdaStridedBatched_bufferSize", "library"); subst("cusolverDnZgesvdj", "hipsolverDnZgesvdj", "library"); subst("cusolverDnZgesvdjBatched", "hipsolverDnZgesvdjBatched", "library"); subst("cusolverDnZgesvdjBatched_bufferSize", "hipsolverDnZgesvdjBatched_bufferSize", "library"); diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP.md b/docs/tables/CUSOLVER_API_supported_by_HIP.md index 29a6e2c9..ecfde03b 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP.md @@ -130,6 +130,8 @@ |`cusolverDnCgeqrf_bufferSize`| | | | |`hipsolverDnCgeqrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnCgesvd`| | | | |`hipsolverDnCgesvd`|5.1.0| | | |6.1.0| |`cusolverDnCgesvd_bufferSize`| | | | |`hipsolverDnCgesvd_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnCgesvdaStridedBatched`|10.1| | | |`hipsolverDnCgesvdaStridedBatched`|5.4.0| | | |6.1.0| +|`cusolverDnCgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnCgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| |`cusolverDnCgesvdj`|9.0| | | |`hipsolverDnCgesvdj`|5.1.0| | | |6.1.0| |`cusolverDnCgesvdjBatched`|9.0| | | |`hipsolverDnCgesvdjBatched`|5.1.0| | | |6.1.0| |`cusolverDnCgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnCgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| @@ -210,6 +212,8 @@ |`cusolverDnDgeqrf_bufferSize`| | | | |`hipsolverDnDgeqrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnDgesvd`| | | | |`hipsolverDnDgesvd`|5.1.0| | | |6.1.0| |`cusolverDnDgesvd_bufferSize`| | | | |`hipsolverDnDgesvd_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnDgesvdaStridedBatched`|10.1| | | |`hipsolverDnDgesvdaStridedBatched`|5.4.0| | | |6.1.0| +|`cusolverDnDgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnDgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| |`cusolverDnDgesvdj`|9.0| | | |`hipsolverDnDgesvdj`|5.1.0| | | |6.1.0| |`cusolverDnDgesvdjBatched`|9.0| | | |`hipsolverDnDgesvdjBatched`|5.1.0| | | |6.1.0| |`cusolverDnDgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnDgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| @@ -308,6 +312,8 @@ |`cusolverDnSgeqrf_bufferSize`| | | | |`hipsolverDnSgeqrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnSgesvd`| | | | |`hipsolverDnSgesvd`|5.1.0| | | |6.1.0| |`cusolverDnSgesvd_bufferSize`| | | | |`hipsolverDnSgesvd_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnSgesvdaStridedBatched`|10.1| | | |`hipsolverDnSgesvdaStridedBatched`|5.4.0| | | |6.1.0| +|`cusolverDnSgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnSgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| |`cusolverDnSgesvdj`|9.0| | | |`hipsolverDnSgesvdj`|5.1.0| | | |6.1.0| |`cusolverDnSgesvdjBatched`|9.0| | | |`hipsolverDnSgesvdjBatched`|5.1.0| | | |6.1.0| |`cusolverDnSgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnSgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| @@ -398,6 +404,8 @@ |`cusolverDnZgeqrf_bufferSize`| | | | |`hipsolverDnZgeqrf_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnZgesvd`| | | | |`hipsolverDnZgesvd`|5.1.0| | | |6.1.0| |`cusolverDnZgesvd_bufferSize`| | | | |`hipsolverDnZgesvd_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnZgesvdaStridedBatched`|10.1| | | |`hipsolverDnZgesvdaStridedBatched`|5.4.0| | | |6.1.0| +|`cusolverDnZgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnZgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| |`cusolverDnZgesvdj`|9.0| | | |`hipsolverDnZgesvdj`|5.1.0| | | |6.1.0| |`cusolverDnZgesvdjBatched`|9.0| | | |`hipsolverDnZgesvdjBatched`|5.1.0| | | |6.1.0| |`cusolverDnZgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnZgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md index 7454bbbb..88de47e0 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md @@ -130,6 +130,8 @@ |`cusolverDnCgeqrf_bufferSize`| | | | |`hipsolverDnCgeqrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnCgesvd`| | | | |`hipsolverDnCgesvd`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnCgesvd_bufferSize`| | | | |`hipsolverDnCgesvd_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnCgesvdaStridedBatched`|10.1| | | |`hipsolverDnCgesvdaStridedBatched`|5.4.0| | | |6.1.0| | | | | | | +|`cusolverDnCgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnCgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| | | | | | | |`cusolverDnCgesvdj`|9.0| | | |`hipsolverDnCgesvdj`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnCgesvdjBatched`|9.0| | | |`hipsolverDnCgesvdjBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnCgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnCgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| | | | | | | @@ -210,6 +212,8 @@ |`cusolverDnDgeqrf_bufferSize`| | | | |`hipsolverDnDgeqrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgesvd`| | | | |`hipsolverDnDgesvd`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgesvd_bufferSize`| | | | |`hipsolverDnDgesvd_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnDgesvdaStridedBatched`|10.1| | | |`hipsolverDnDgesvdaStridedBatched`|5.4.0| | | |6.1.0| | | | | | | +|`cusolverDnDgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnDgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| | | | | | | |`cusolverDnDgesvdj`|9.0| | | |`hipsolverDnDgesvdj`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgesvdjBatched`|9.0| | | |`hipsolverDnDgesvdjBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnDgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| | | | | | | @@ -308,6 +312,8 @@ |`cusolverDnSgeqrf_bufferSize`| | | | |`hipsolverDnSgeqrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgesvd`| | | | |`hipsolverDnSgesvd`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgesvd_bufferSize`| | | | |`hipsolverDnSgesvd_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnSgesvdaStridedBatched`|10.1| | | |`hipsolverDnSgesvdaStridedBatched`|5.4.0| | | |6.1.0| | | | | | | +|`cusolverDnSgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnSgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| | | | | | | |`cusolverDnSgesvdj`|9.0| | | |`hipsolverDnSgesvdj`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgesvdjBatched`|9.0| | | |`hipsolverDnSgesvdjBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnSgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| | | | | | | @@ -398,6 +404,8 @@ |`cusolverDnZgeqrf_bufferSize`| | | | |`hipsolverDnZgeqrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZgesvd`| | | | |`hipsolverDnZgesvd`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZgesvd_bufferSize`| | | | |`hipsolverDnZgesvd_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnZgesvdaStridedBatched`|10.1| | | |`hipsolverDnZgesvdaStridedBatched`|5.4.0| | | |6.1.0| | | | | | | +|`cusolverDnZgesvdaStridedBatched_bufferSize`|10.1| | | |`hipsolverDnZgesvdaStridedBatched_bufferSize`|5.4.0| | | |6.1.0| | | | | | | |`cusolverDnZgesvdj`|9.0| | | |`hipsolverDnZgesvdj`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZgesvdjBatched`|9.0| | | |`hipsolverDnZgesvdjBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZgesvdjBatched_bufferSize`|9.0| | | |`hipsolverDnZgesvdjBatched_bufferSize`|5.1.0| | | |6.1.0| | | | | | | diff --git a/docs/tables/CUSOLVER_API_supported_by_ROC.md b/docs/tables/CUSOLVER_API_supported_by_ROC.md index 299ba724..02b4a557 100644 --- a/docs/tables/CUSOLVER_API_supported_by_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_ROC.md @@ -130,6 +130,8 @@ |`cusolverDnCgeqrf_bufferSize`| | | | | | | | | | | |`cusolverDnCgesvd`| | | | | | | | | | | |`cusolverDnCgesvd_bufferSize`| | | | | | | | | | | +|`cusolverDnCgesvdaStridedBatched`|10.1| | | | | | | | | | +|`cusolverDnCgesvdaStridedBatched_bufferSize`|10.1| | | | | | | | | | |`cusolverDnCgesvdj`|9.0| | | | | | | | | | |`cusolverDnCgesvdjBatched`|9.0| | | | | | | | | | |`cusolverDnCgesvdjBatched_bufferSize`|9.0| | | | | | | | | | @@ -210,6 +212,8 @@ |`cusolverDnDgeqrf_bufferSize`| | | | | | | | | | | |`cusolverDnDgesvd`| | | | | | | | | | | |`cusolverDnDgesvd_bufferSize`| | | | | | | | | | | +|`cusolverDnDgesvdaStridedBatched`|10.1| | | | | | | | | | +|`cusolverDnDgesvdaStridedBatched_bufferSize`|10.1| | | | | | | | | | |`cusolverDnDgesvdj`|9.0| | | | | | | | | | |`cusolverDnDgesvdjBatched`|9.0| | | | | | | | | | |`cusolverDnDgesvdjBatched_bufferSize`|9.0| | | | | | | | | | @@ -308,6 +312,8 @@ |`cusolverDnSgeqrf_bufferSize`| | | | | | | | | | | |`cusolverDnSgesvd`| | | | | | | | | | | |`cusolverDnSgesvd_bufferSize`| | | | | | | | | | | +|`cusolverDnSgesvdaStridedBatched`|10.1| | | | | | | | | | +|`cusolverDnSgesvdaStridedBatched_bufferSize`|10.1| | | | | | | | | | |`cusolverDnSgesvdj`|9.0| | | | | | | | | | |`cusolverDnSgesvdjBatched`|9.0| | | | | | | | | | |`cusolverDnSgesvdjBatched_bufferSize`|9.0| | | | | | | | | | @@ -398,6 +404,8 @@ |`cusolverDnZgeqrf_bufferSize`| | | | | | | | | | | |`cusolverDnZgesvd`| | | | | | | | | | | |`cusolverDnZgesvd_bufferSize`| | | | | | | | | | | +|`cusolverDnZgesvdaStridedBatched`|10.1| | | | | | | | | | +|`cusolverDnZgesvdaStridedBatched_bufferSize`|10.1| | | | | | | | | | |`cusolverDnZgesvdj`|9.0| | | | | | | | | | |`cusolverDnZgesvdjBatched`|9.0| | | | | | | | | | |`cusolverDnZgesvdjBatched_bufferSize`|9.0| | | | | | | | | | diff --git a/src/CUDA2HIP_SOLVER_API_functions.cpp b/src/CUDA2HIP_SOLVER_API_functions.cpp index 6762ace2..af158b4d 100644 --- a/src/CUDA2HIP_SOLVER_API_functions.cpp +++ b/src/CUDA2HIP_SOLVER_API_functions.cpp @@ -427,6 +427,16 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnDgesvdj", {"hipsolverDnDgesvdj", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnCgesvdj", {"hipsolverDnCgesvdj", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnZgesvdj", {"hipsolverDnZgesvdj", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + // NOTE: rocsolver_(s|d|c|z)gesvdx_strided_batched have a harness of other ROC and HIP API calls + {"cusolverDnSgesvdaStridedBatched_bufferSize", {"hipsolverDnSgesvdaStridedBatched_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnDgesvdaStridedBatched_bufferSize", {"hipsolverDnDgesvdaStridedBatched_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnCgesvdaStridedBatched_bufferSize", {"hipsolverDnCgesvdaStridedBatched_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnZgesvdaStridedBatched_bufferSize", {"hipsolverDnZgesvdaStridedBatched_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + // NOTE: rocsolver_(s|d|c|z)gesvdx_strided_batched have a harness of other ROC and HIP API calls + {"cusolverDnSgesvdaStridedBatched", {"hipsolverDnSgesvdaStridedBatched", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnDgesvdaStridedBatched", {"hipsolverDnDgesvdaStridedBatched", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnCgesvdaStridedBatched", {"hipsolverDnCgesvdaStridedBatched", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnZgesvdaStridedBatched", {"hipsolverDnZgesvdaStridedBatched", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, }; const std::map CUDA_SOLVER_FUNCTION_VER_MAP { @@ -697,6 +707,14 @@ const std::map CUDA_SOLVER_FUNCTION_VER_MAP { {"cusolverDnDgesvdj", {CUDA_90, CUDA_0, CUDA_0}}, {"cusolverDnCgesvdj", {CUDA_90, CUDA_0, CUDA_0}}, {"cusolverDnZgesvdj", {CUDA_90, CUDA_0, CUDA_0}}, + {"cusolverDnSgesvdaStridedBatched_bufferSize", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnDgesvdaStridedBatched_bufferSize", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnCgesvdaStridedBatched_bufferSize", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnZgesvdaStridedBatched_bufferSize", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnSgesvdaStridedBatched", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnDgesvdaStridedBatched", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnCgesvdaStridedBatched", {CUDA_101, CUDA_0, CUDA_0}}, + {"cusolverDnZgesvdaStridedBatched", {CUDA_101, CUDA_0, CUDA_0}}, }; const std::map HIP_SOLVER_FUNCTION_VER_MAP { @@ -926,6 +944,14 @@ const std::map HIP_SOLVER_FUNCTION_VER_MAP { {"hipsolverDnDgesvdj", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnCgesvdj", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnZgesvdj", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnSgesvdaStridedBatched_bufferSize", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnDgesvdaStridedBatched_bufferSize", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnCgesvdaStridedBatched_bufferSize", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnZgesvdaStridedBatched_bufferSize", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnSgesvdaStridedBatched", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnDgesvdaStridedBatched", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnCgesvdaStridedBatched", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnZgesvdaStridedBatched", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, {"rocsolver_spotrf", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, {"rocsolver_dpotrf", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, diff --git a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu index 95e50b93..2c2eb43b 100644 --- a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu +++ b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu @@ -15,6 +15,7 @@ int main() { int il = 0; int iu = 0; int imeig = 0; + int irank = 0; int nrhs = 0; int lda = 0; int ldb = 0; @@ -26,6 +27,7 @@ int main() { int devIpiv = 0; int devInfo = 0; int info = 0; + int id_info = 0; int infoArray = 0; int batchSize = 0; int imax_sweeps = 0; @@ -33,15 +35,19 @@ int main() { int iexecuted_sweeps = 0; int iecon = 0; float fA = 0.f; + float fd_A = 0.f; float fB = 0.f; float fC = 0.f; float fD = 0.f; float fE = 0.f; float fS = 0.f; + float fd_S = 0.f; float fU = 0.f; + float fd_U = 0.f; float fvl = 0.f; float fvu = 0.f; float fV = 0.f; + float fd_V = 0.f; float fVT = 0.f; float fX = 0.f; float fW = 0.f; @@ -49,15 +55,19 @@ int main() { float fTAUQ = 0.f; float fTAUP = 0.f; double dA = 0.f; + double dd_A = 0.f; double dB = 0.f; double dC = 0.f; double dD = 0.f; double dE = 0.f; double dS = 0.f; + double dd_S = 0.f; double dU = 0.f; + double dd_U = 0.f; double dvl = 0.f; double dvu = 0.f; double dV = 0.f; + double dd_V = 0.f; double dVT = 0.f; double dX = 0.f; double dW = 0.f; @@ -67,25 +77,33 @@ int main() { double dtolerance = 0.f; double dresidual = 0.f; float fWorkspace = 0.f; + float fd_Workspace = 0.f; float frWork = 0.f; double dWorkspace = 0.f; + double dd_Workspace = 0.f; double drWork = 0.f; + double dh_R_nrmF = 0.f; void *Workspace = nullptr; size_t lwork_bytes = 0; signed char jobu = 0; signed char jobvt = 0; + long long int strideA = 0; + long long int strideS = 0; + long long int strideU = 0; + long long int strideV = 0; + float** fAarray = 0; float** fBarray = 0; double** dAarray = 0; double** dBarray = 0; - // CHECK: hipDoubleComplex dComplexA, dComplexB, dComplexC, dComplexD, dComplexE, dComplexS, dComplexU, dComplexV, dComplexVT, dComplexX, dComplexWorkspace, dComplexrWork, dComplexTAU, dComplexTAUQ, dComplexTAUP; - cuDoubleComplex dComplexA, dComplexB, dComplexC, dComplexD, dComplexE, dComplexS, dComplexU, dComplexV, dComplexVT, dComplexX, dComplexWorkspace, dComplexrWork, dComplexTAU, dComplexTAUQ, dComplexTAUP; + // CHECK: hipDoubleComplex dComplexA, dComplexd_A, dComplexB, dComplexC, dComplexD, dComplexE, dComplexS, dComplexU, dComplexd_U, dComplexV, dComplexd_V, dComplexVT, dComplexX, dComplexWorkspace, dComplexd_Workspace, dComplexrWork, dComplexTAU, dComplexTAUQ, dComplexTAUP; + cuDoubleComplex dComplexA, dComplexd_A, dComplexB, dComplexC, dComplexD, dComplexE, dComplexS, dComplexU, dComplexd_U, dComplexV, dComplexd_V, dComplexVT, dComplexX, dComplexWorkspace, dComplexd_Workspace, dComplexrWork, dComplexTAU, dComplexTAUQ, dComplexTAUP; - // CHECK: hipComplex complexA, complexB, complexC, complexD, complexE, complexS, complexU, complexV, complexVT, complexX, complexWorkspace, complexrWork, complexTAU, complexTAUQ, complexTAUP; - cuComplex complexA, complexB, complexC, complexD, complexE, complexS, complexU, complexV, complexVT, complexX, complexWorkspace, complexrWork, complexTAU, complexTAUQ, complexTAUP; + // CHECK: hipComplex complexA, complexd_A, complexB, complexC, complexD, complexE, complexS, complexU, complexd_U, complexV, complexd_V, complexVT, complexX, complexWorkspace, complexd_Workspace, complexrWork, complexTAU, complexTAUQ, complexTAUP; + cuComplex complexA, complexd_A, complexB, complexC, complexD, complexE, complexS, complexU, complexd_U, complexV, complexd_V, complexVT, complexX, complexWorkspace, complexd_Workspace, complexrWork, complexTAU, complexTAUQ, complexTAUP; // CHECK: hipDoubleComplex** dcomplexAarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; @@ -1222,6 +1240,46 @@ int main() { // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZhegvdx(hipsolverHandle_t handle, hipsolverEigType_t itype, hipsolverEigMode_t jobz, hipsolverEigRange_t range, hipblasFillMode_t uplo, int n, hipDoubleComplex* A, int lda, hipDoubleComplex* B, int ldb, double vl, double vu, int il, int iu, int* nev, double* W, hipDoubleComplex* work, int lwork, int* devInfo); // CHECK: status = hipsolverDnZhegvdx(handle, eigType, jobz, eigRange, fillMode, n, &dComplexA, lda, &dComplexB, ldb, dvl, dvu, il, iu, &imeig, &dW, &dComplexWorkspace, Lwork, &info); status = cusolverDnZhegvdx(handle, eigType, jobz, eigRange, fillMode, n, &dComplexA, lda, &dComplexB, ldb, dvl, dvu, il, iu, &imeig, &dW, &dComplexWorkspace, Lwork, &info); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched_bufferSize(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const float * d_A, int lda, long long int strideA, const float * d_S, long long int strideS, const float * d_U, int ldu, long long int strideU, const float * d_V, int ldv, long long int strideV, int * lwork, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnSgesvdaStridedBatched_bufferSize(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const float* A, int lda, long long int strideA, const float* S, long long int strideS, const float* U, int ldu, long long int strideU, const float* V, int ldv, long long int strideV, int* lwork, int batch_count); + // CHECK: status = hipsolverDnSgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &fd_A, lda, strideA, &fd_S, strideS, &fd_U, ldu, strideU, &fd_V, ldv, strideV, &Lwork, batchSize); + status = cusolverDnSgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &fd_A, lda, strideA, &fd_S, strideS, &fd_U, ldu, strideU, &fd_V, ldv, strideV, &Lwork, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched_bufferSize(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const double * d_A, int lda, long long int strideA, const double * d_S, long long int strideS, const double * d_U, int ldu, long long int strideU, const double * d_V, int ldv, long long int strideV, int * lwork, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnDgesvdaStridedBatched_bufferSize(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const double* A, int lda, long long int strideA, const double* S, long long int strideS, const double* U, int ldu, long long int strideU, const double* V, int ldv, long long int strideV, int* lwork, int batch_count); + // CHECK: status = hipsolverDnDgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &dd_A, lda, strideA, &dd_S, strideS, &dd_U, ldu, strideU, &dd_V, ldv, strideV, &Lwork, batchSize); + status = cusolverDnDgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &dd_A, lda, strideA, &dd_S, strideS, &dd_U, ldu, strideU, &dd_V, ldv, strideV, &Lwork, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched_bufferSize(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const cuComplex * d_A, int lda, long long int strideA, const float * d_S, long long int strideS, const cuComplex * d_U, int ldu, long long int strideU, const cuComplex * d_V, int ldv, long long int strideV, int * lwork, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnCgesvdaStridedBatched_bufferSize(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const hipFloatComplex* A, int lda, long long int strideA, const float* S, long long int strideS, const hipFloatComplex* U, int ldu, long long int strideU, const hipFloatComplex* V, int ldv, long long int strideV, int* lwork, int batch_count); + // CHECK: status = hipsolverDnCgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &complexd_A, lda, strideA, &fd_S, strideS, &complexd_U, ldu, strideU, &complexd_V, ldv, strideV, &Lwork, batchSize); + status = cusolverDnCgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &complexd_A, lda, strideA, &fd_S, strideS, &complexd_U, ldu, strideU, &complexd_V, ldv, strideV, &Lwork, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched_bufferSize(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const cuDoubleComplex *d_A, int lda, long long int strideA, const double * d_S, long long int strideS, const cuDoubleComplex *d_U, int ldu, long long int strideU, const cuDoubleComplex *d_V, int ldv, long long int strideV, int * lwork, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZgesvdaStridedBatched_bufferSize(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const hipDoubleComplex* A, int lda, long long int strideA, const double* S, long long int strideS, const hipDoubleComplex* U, int ldu, long long int strideU, const hipDoubleComplex* V, int ldv, long long int strideV, int* lwork, int batch_count); + // CHECK: status = hipsolverDnZgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &dComplexd_A, lda, strideA, &dd_S, strideS, &dComplexd_U, ldu, strideU, &dComplexd_V, ldv, strideV, &Lwork, batchSize); + status = cusolverDnZgesvdaStridedBatched_bufferSize(handle, jobz, irank, m, n, &dComplexd_A, lda, strideA, &dd_S, strideS, &dComplexd_U, ldu, strideU, &dComplexd_V, ldv, strideV, &Lwork, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSgesvdaStridedBatched(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const float * d_A, int lda, long long int strideA, float * d_S, long long int strideS, float * d_U, int ldu, long long int strideU, float * d_V, int ldv, long long int strideV, float * d_work, int lwork, int * d_info, double * h_R_nrmF, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnSgesvdaStridedBatched(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const float* A, int lda, long long int strideA, float* S, long long int strideS, float* U, int ldu, long long int strideU, float* V, int ldv, long long int strideV, float* work, int lwork, int* devInfo, double* hRnrmF, int batch_count); + // CHECK: status = hipsolverDnSgesvdaStridedBatched(handle, jobz, irank, m, n, &fd_A, lda, strideA, &fd_S, strideS, &fd_U, ldu, strideU, &fd_V, ldv, strideV, &fd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + status = cusolverDnSgesvdaStridedBatched(handle, jobz, irank, m, n, &fd_A, lda, strideA, &fd_S, strideS, &fd_U, ldu, strideU, &fd_V, ldv, strideV, &fd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnDgesvdaStridedBatched(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const double * d_A, int lda, long long int strideA, double * d_S, long long int strideS, double * d_U, int ldu, long long int strideU, double * d_V, int ldv, long long int strideV, double * d_work, int lwork, int * d_info, double * h_R_nrmF, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnDgesvdaStridedBatched(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const double* A, int lda, long long int strideA, double* S, long long int strideS, double* U, int ldu, long long int strideU, double* V, int ldv, long long int strideV, double* work, int lwork, int* devInfo, double* hRnrmF, int batch_count); + // CHECK: status = hipsolverDnDgesvdaStridedBatched(handle, jobz, irank, m, n, &dd_A, lda, strideA, &dd_S, strideS, &dd_U, ldu, strideU, &dd_V, ldv, strideV, &dd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + status = cusolverDnDgesvdaStridedBatched(handle, jobz, irank, m, n, &dd_A, lda, strideA, &dd_S, strideS, &dd_U, ldu, strideU, &dd_V, ldv, strideV, &dd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnCgesvdaStridedBatched(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const cuComplex * d_A, int lda, long long int strideA, float * d_S, long long int strideS, cuComplex * d_U, int ldu, long long int strideU, cuComplex * d_V, int ldv, long long int strideV, cuComplex * d_work, int lwork, int * d_info, double * h_R_nrmF, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnCgesvdaStridedBatched(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const hipFloatComplex* A, int lda, long long int strideA, float* S, long long int strideS, hipFloatComplex* U, int ldu, long long int strideU, hipFloatComplex* V, int ldv, long long int strideV, hipFloatComplex* work, int lwork, int* devInfo, double* hRnrmF, int batch_count); + // CHECK: status = hipsolverDnCgesvdaStridedBatched(handle, jobz, irank, m, n, &complexd_A, lda, strideA, &fd_S, strideS, &complexd_U, ldu, strideU, &complexd_V, ldv, strideV, &complexd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + status = cusolverDnCgesvdaStridedBatched(handle, jobz, irank, m, n, &complexd_A, lda, strideA, &fd_S, strideS, &complexd_U, ldu, strideU, &complexd_V, ldv, strideV, &complexd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnZgesvdaStridedBatched(cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, const cuDoubleComplex *d_A, int lda, long long int strideA, double * d_S, long long int strideS, cuDoubleComplex * d_U, int ldu, long long int strideU, cuDoubleComplex * d_V, int ldv, long long int strideV, cuDoubleComplex * d_work, int lwork, int * d_info, double * h_R_nrmF, int batchSize); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZgesvdaStridedBatched(hipsolverHandle_t handle, hipsolverEigMode_t jobz, int rank, int m, int n, const hipDoubleComplex* A, int lda, long long int strideA, double* S, long long int strideS, hipDoubleComplex* U, int ldu, long long int strideU, hipDoubleComplex* V, int ldv, long long int strideV, hipDoubleComplex* work, int lwork, int* devInfo, double* hRnrmF, int batch_count); + // CHECK: status = hipsolverDnZgesvdaStridedBatched(handle, jobz, irank, m, n, &dComplexd_A, lda, strideA, &dd_S, strideS, &dComplexd_U, ldu, strideU, &dComplexd_V, ldv, strideV, &dComplexd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); + status = cusolverDnZgesvdaStridedBatched(handle, jobz, irank, m, n, &dComplexd_A, lda, strideA, &dd_S, strideS, &dComplexd_U, ldu, strideU, &dComplexd_V, ldv, strideV, &dComplexd_Workspace, Lwork, &id_info, &dh_R_nrmF, batchSize); #endif #if CUDA_VERSION >= 10020