From 6fc1fdaa805e475deb1a1cb8ff6b560ca2a73918 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 5 Feb 2024 13:55:47 +0000 Subject: [PATCH] [HIPIFY][BLAS][6.1][sync] Sync with `hipBLAS` and `rocBLAS` - Step 12 - SWAP 64bit + Updated `BLAS` synthetic tests, the regenerated hipify-perl, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 80 +++++++++---------- docs/tables/CUBLAS_API_supported_by_HIP.md | 16 ++-- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 16 ++-- docs/tables/CUBLAS_API_supported_by_ROC.md | 16 ++-- src/CUDA2HIP_BLAS_API_functions.cpp | 24 ++++-- .../synthetic/libraries/cublas2hipblas_v2.cu | 28 +++++++ .../synthetic/libraries/cublas2rocblas_v2.cu | 28 +++++++ 7 files changed, 136 insertions(+), 72 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index fc8ef7c3..57e52189 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1297,15 +1297,21 @@ sub rocSubstitutions { subst("cublasCrotg", "rocblas_crotg", "library"); subst("cublasCrotg_v2", "rocblas_crotg", "library"); subst("cublasCscal", "rocblas_cscal", "library"); + subst("cublasCscal_64", "rocblas_cscal_64", "library"); subst("cublasCscal_v2", "rocblas_cscal", "library"); + subst("cublasCscal_v2_64", "rocblas_cscal_64", "library"); subst("cublasCsrot", "rocblas_csrot", "library"); subst("cublasCsrot_64", "rocblas_csrot_64", "library"); subst("cublasCsrot_v2", "rocblas_csrot", "library"); subst("cublasCsrot_v2_64", "rocblas_csrot_64", "library"); subst("cublasCsscal", "rocblas_csscal", "library"); + subst("cublasCsscal_64", "rocblas_csscal_64", "library"); subst("cublasCsscal_v2", "rocblas_csscal", "library"); + subst("cublasCsscal_v2_64", "rocblas_csscal_64", "library"); subst("cublasCswap", "rocblas_cswap", "library"); + subst("cublasCswap_64", "rocblas_cswap_64", "library"); subst("cublasCswap_v2", "rocblas_cswap", "library"); + subst("cublasCswap_v2_64", "rocblas_cswap_64", "library"); subst("cublasCsymm", "rocblas_csymm", "library"); subst("cublasCsymm_v2", "rocblas_csymm", "library"); subst("cublasCsymv", "rocblas_csymv", "library"); @@ -1387,7 +1393,9 @@ sub rocSubstitutions { subst("cublasDsbmv", "rocblas_dsbmv", "library"); subst("cublasDsbmv_v2", "rocblas_dsbmv", "library"); subst("cublasDscal", "rocblas_dscal", "library"); + subst("cublasDscal_64", "rocblas_dscal_64", "library"); subst("cublasDscal_v2", "rocblas_dscal", "library"); + subst("cublasDscal_v2_64", "rocblas_dscal_64", "library"); subst("cublasDspmv", "rocblas_dspmv", "library"); subst("cublasDspmv_v2", "rocblas_dspmv", "library"); subst("cublasDspr", "rocblas_dspr", "library"); @@ -1395,7 +1403,9 @@ sub rocSubstitutions { subst("cublasDspr2_v2", "rocblas_dspr2", "library"); subst("cublasDspr_v2", "rocblas_dspr", "library"); subst("cublasDswap", "rocblas_dswap", "library"); + subst("cublasDswap_64", "rocblas_dswap_64", "library"); subst("cublasDswap_v2", "rocblas_dswap", "library"); + subst("cublasDswap_v2_64", "rocblas_dswap_64", "library"); subst("cublasDsymm", "rocblas_dsymm", "library"); subst("cublasDsymm_v2", "rocblas_dsymm", "library"); subst("cublasDsymv", "rocblas_dsymv", "library"); @@ -1556,7 +1566,9 @@ sub rocSubstitutions { subst("cublasSsbmv", "rocblas_ssbmv", "library"); subst("cublasSsbmv_v2", "rocblas_ssbmv", "library"); subst("cublasSscal", "rocblas_sscal", "library"); + subst("cublasSscal_64", "rocblas_sscal_64", "library"); subst("cublasSscal_v2", "rocblas_sscal", "library"); + subst("cublasSscal_v2_64", "rocblas_sscal_64", "library"); subst("cublasSspmv", "rocblas_sspmv", "library"); subst("cublasSspmv_v2", "rocblas_sspmv", "library"); subst("cublasSspr", "rocblas_sspr", "library"); @@ -1564,7 +1576,9 @@ sub rocSubstitutions { subst("cublasSspr2_v2", "rocblas_sspr2", "library"); subst("cublasSspr_v2", "rocblas_sspr", "library"); subst("cublasSswap", "rocblas_sswap", "library"); + subst("cublasSswap_64", "rocblas_sswap_64", "library"); subst("cublasSswap_v2", "rocblas_sswap", "library"); + subst("cublasSswap_v2_64", "rocblas_sswap_64", "library"); subst("cublasSsymm", "rocblas_ssymm", "library"); subst("cublasSsymm_v2", "rocblas_ssymm", "library"); subst("cublasSsymv", "rocblas_ssymv", "library"); @@ -1621,7 +1635,9 @@ sub rocSubstitutions { subst("cublasZdrot_v2", "rocblas_zdrot", "library"); subst("cublasZdrot_v2_64", "rocblas_zdrot_64", "library"); subst("cublasZdscal", "rocblas_zdscal", "library"); + subst("cublasZdscal_64", "rocblas_zdscal_64", "library"); subst("cublasZdscal_v2", "rocblas_zdscal", "library"); + subst("cublasZdscal_v2_64", "rocblas_zdscal_64", "library"); subst("cublasZgbmv", "rocblas_zgbmv", "library"); subst("cublasZgbmv_v2", "rocblas_zgbmv", "library"); subst("cublasZgeam", "rocblas_zgeam", "library"); @@ -1665,9 +1681,13 @@ sub rocSubstitutions { subst("cublasZrotg", "rocblas_zrotg", "library"); subst("cublasZrotg_v2", "rocblas_zrotg", "library"); subst("cublasZscal", "rocblas_zscal", "library"); + subst("cublasZscal_64", "rocblas_zscal_64", "library"); subst("cublasZscal_v2", "rocblas_zscal", "library"); + subst("cublasZscal_v2_64", "rocblas_zscal_64", "library"); subst("cublasZswap", "rocblas_zswap", "library"); + subst("cublasZswap_64", "rocblas_zswap_64", "library"); subst("cublasZswap_v2", "rocblas_zswap", "library"); + subst("cublasZswap_v2_64", "rocblas_zswap_64", "library"); subst("cublasZsymm", "rocblas_zsymm", "library"); subst("cublasZsymm_v2", "rocblas_zsymm", "library"); subst("cublasZsymv", "rocblas_zsymv", "library"); @@ -3296,15 +3316,21 @@ sub simpleSubstitutions { subst("cublasCrotg", "hipblasCrotg_v2", "library"); subst("cublasCrotg_v2", "hipblasCrotg_v2", "library"); subst("cublasCscal", "hipblasCscal_v2", "library"); + subst("cublasCscal_64", "hipblasCscal_v2_64", "library"); subst("cublasCscal_v2", "hipblasCscal_v2", "library"); + subst("cublasCscal_v2_64", "hipblasCscal_v2_64", "library"); subst("cublasCsrot", "hipblasCsrot_v2", "library"); subst("cublasCsrot_64", "hipblasCsrot_v2_64", "library"); subst("cublasCsrot_v2", "hipblasCsrot_v2", "library"); subst("cublasCsrot_v2_64", "hipblasCsrot_v2_64", "library"); subst("cublasCsscal", "hipblasCsscal_v2", "library"); + subst("cublasCsscal_64", "hipblasCsscal_v2_64", "library"); subst("cublasCsscal_v2", "hipblasCsscal_v2", "library"); + subst("cublasCsscal_v2_64", "hipblasCsscal_v2_64", "library"); subst("cublasCswap", "hipblasCswap_v2", "library"); + subst("cublasCswap_64", "hipblasCswap_v2_64", "library"); subst("cublasCswap_v2", "hipblasCswap_v2", "library"); + subst("cublasCswap_v2_64", "hipblasCswap_v2_64", "library"); subst("cublasCsymm", "hipblasCsymm_v2", "library"); subst("cublasCsymm_v2", "hipblasCsymm_v2", "library"); subst("cublasCsymv", "hipblasCsymv_v2", "library"); @@ -3391,7 +3417,9 @@ sub simpleSubstitutions { subst("cublasDsbmv", "hipblasDsbmv", "library"); subst("cublasDsbmv_v2", "hipblasDsbmv", "library"); subst("cublasDscal", "hipblasDscal", "library"); + subst("cublasDscal_64", "hipblasDscal_64", "library"); subst("cublasDscal_v2", "hipblasDscal", "library"); + subst("cublasDscal_v2_64", "hipblasDscal_64", "library"); subst("cublasDspmv", "hipblasDspmv", "library"); subst("cublasDspmv_v2", "hipblasDspmv", "library"); subst("cublasDspr", "hipblasDspr", "library"); @@ -3399,7 +3427,9 @@ sub simpleSubstitutions { subst("cublasDspr2_v2", "hipblasDspr2", "library"); subst("cublasDspr_v2", "hipblasDspr", "library"); subst("cublasDswap", "hipblasDswap", "library"); + subst("cublasDswap_64", "hipblasDswap_64", "library"); subst("cublasDswap_v2", "hipblasDswap", "library"); + subst("cublasDswap_v2_64", "hipblasDswap_64", "library"); subst("cublasDsymm", "hipblasDsymm", "library"); subst("cublasDsymm_v2", "hipblasDsymm", "library"); subst("cublasDsymv", "hipblasDsymv", "library"); @@ -3559,7 +3589,9 @@ sub simpleSubstitutions { subst("cublasSsbmv", "hipblasSsbmv", "library"); subst("cublasSsbmv_v2", "hipblasSsbmv", "library"); subst("cublasSscal", "hipblasSscal", "library"); + subst("cublasSscal_64", "hipblasSscal_64", "library"); subst("cublasSscal_v2", "hipblasSscal", "library"); + subst("cublasSscal_v2_64", "hipblasSscal_64", "library"); subst("cublasSspmv", "hipblasSspmv", "library"); subst("cublasSspmv_v2", "hipblasSspmv", "library"); subst("cublasSspr", "hipblasSspr", "library"); @@ -3567,7 +3599,9 @@ sub simpleSubstitutions { subst("cublasSspr2_v2", "hipblasSspr2", "library"); subst("cublasSspr_v2", "hipblasSspr", "library"); subst("cublasSswap", "hipblasSswap", "library"); + subst("cublasSswap_64", "hipblasSswap_64", "library"); subst("cublasSswap_v2", "hipblasSswap", "library"); + subst("cublasSswap_v2_64", "hipblasSswap_64", "library"); subst("cublasSsymm", "hipblasSsymm", "library"); subst("cublasSsymm_v2", "hipblasSsymm", "library"); subst("cublasSsymv", "hipblasSsymv", "library"); @@ -3620,7 +3654,9 @@ sub simpleSubstitutions { subst("cublasZdrot_v2", "hipblasZdrot_v2", "library"); subst("cublasZdrot_v2_64", "hipblasZdrot_v2_64", "library"); subst("cublasZdscal", "hipblasZdscal_v2", "library"); + subst("cublasZdscal_64", "hipblasZdscal_v2_64", "library"); subst("cublasZdscal_v2", "hipblasZdscal_v2", "library"); + subst("cublasZdscal_v2_64", "hipblasZdscal_v2_64", "library"); subst("cublasZgbmv", "hipblasZgbmv_v2", "library"); subst("cublasZgbmv_v2", "hipblasZgbmv_v2", "library"); subst("cublasZgeam", "hipblasZgeam_v2", "library"); @@ -3669,9 +3705,13 @@ sub simpleSubstitutions { subst("cublasZrotg", "hipblasZrotg_v2", "library"); subst("cublasZrotg_v2", "hipblasZrotg_v2", "library"); subst("cublasZscal", "hipblasZscal_v2", "library"); + subst("cublasZscal_64", "hipblasZscal_v2_64", "library"); subst("cublasZscal_v2", "hipblasZscal_v2", "library"); + subst("cublasZscal_v2_64", "hipblasZscal_v2_64", "library"); subst("cublasZswap", "hipblasZswap_v2", "library"); + subst("cublasZswap_64", "hipblasZswap_v2_64", "library"); subst("cublasZswap_v2", "hipblasZswap_v2", "library"); + subst("cublasZswap_v2_64", "hipblasZswap_v2_64", "library"); subst("cublasZsymm", "hipblasZsymm_v2", "library"); subst("cublasZsymm_v2", "hipblasZsymm_v2", "library"); subst("cublasZsymv", "hipblasZsymv_v2", "library"); @@ -10561,10 +10601,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZsymv_64", "cublasZsymm_v2_64", "cublasZsymm_64", - "cublasZswap_v2_64", - "cublasZswap_64", - "cublasZscal_v2_64", - "cublasZscal_64", "cublasZmatinvBatched", "cublasZhpr_v2_64", "cublasZhpr_64", @@ -10604,8 +10640,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZgeam_64", "cublasZgbmv_v2_64", "cublasZgbmv_64", - "cublasZdscal_v2_64", - "cublasZdscal_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -10651,16 +10685,12 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsymv_64", "cublasSsymm_v2_64", "cublasSsymm_64", - "cublasSswap_v2_64", - "cublasSswap_64", "cublasSspr_v2_64", "cublasSspr_64", "cublasSspr2_v2_64", "cublasSspr2_64", "cublasSspmv_v2_64", "cublasSspmv_64", - "cublasSscal_v2_64", - "cublasSscal_64", "cublasSsbmv_v2_64", "cublasSsbmv_64", "cublasSmatinvBatched", @@ -10765,16 +10795,12 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsymv_64", "cublasDsymm_v2_64", "cublasDsymm_64", - "cublasDswap_v2_64", - "cublasDswap_64", "cublasDspr_v2_64", "cublasDspr_64", "cublasDspr2_v2_64", "cublasDspr2_64", "cublasDspmv_v2_64", "cublasDspmv_64", - "cublasDscal_v2_64", - "cublasDscal_64", "cublasDsbmv_v2_64", "cublasDsbmv_64", "cublasDotcEx_64", @@ -10832,12 +10858,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCsymv_64", "cublasCsymm_v2_64", "cublasCsymm_64", - "cublasCswap_v2_64", - "cublasCswap_64", - "cublasCsscal_v2_64", - "cublasCsscal_64", - "cublasCscal_v2_64", - "cublasCscal_64", "cublasCopyEx_64", "cublasCopyEx", "cublasContext", @@ -11001,10 +11021,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZsymv_64", "cublasZsymm_v2_64", "cublasZsymm_64", - "cublasZswap_v2_64", - "cublasZswap_64", - "cublasZscal_v2_64", - "cublasZscal_64", "cublasZmatinvBatched", "cublasZhpr_v2_64", "cublasZhpr_64", @@ -11049,8 +11065,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgeam_64", "cublasZgbmv_v2_64", "cublasZgbmv_64", - "cublasZdscal_v2_64", - "cublasZdscal_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -11092,16 +11106,12 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSsymv_64", "cublasSsymm_v2_64", "cublasSsymm_64", - "cublasSswap_v2_64", - "cublasSswap_64", "cublasSspr_v2_64", "cublasSspr_64", "cublasSspr2_v2_64", "cublasSspr2_64", "cublasSspmv_v2_64", "cublasSspmv_64", - "cublasSscal_v2_64", - "cublasSscal_64", "cublasSsbmv_v2_64", "cublasSsbmv_64", "cublasSmatinvBatched", @@ -11205,16 +11215,12 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDsymv_64", "cublasDsymm_v2_64", "cublasDsymm_64", - "cublasDswap_v2_64", - "cublasDswap_64", "cublasDspr_v2_64", "cublasDspr_64", "cublasDspr2_v2_64", "cublasDspr2_64", "cublasDspmv_v2_64", "cublasDspmv_64", - "cublasDscal_v2_64", - "cublasDscal_64", "cublasDsbmv_v2_64", "cublasDsbmv_64", "cublasDotcEx_64", @@ -11277,12 +11283,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsymv_64", "cublasCsymm_v2_64", "cublasCsymm_64", - "cublasCswap_v2_64", - "cublasCswap_64", - "cublasCsscal_v2_64", - "cublasCsscal_64", - "cublasCscal_v2_64", - "cublasCscal_64", "cublasCopyEx_64", "cublasCopyEx", "cublasCmatinvBatched", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 4e28e386..7e552949 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -238,9 +238,9 @@ |`cublasCsscal_v2`| | | | |`hipblasCsscal_v2`|6.0.0| | | | | |`cublasCsscal_v2_64`|12.0| | | |`hipblasCsscal_v2_64`|6.1.0| | | | | |`cublasCswap`| | | | |`hipblasCswap_v2`|6.0.0| | | | | -|`cublasCswap_64`|12.0| | | | | | | | | | +|`cublasCswap_64`|12.0| | | |`hipblasCswap_v2_64`|6.1.0| | | | | |`cublasCswap_v2`| | | | |`hipblasCswap_v2`|6.0.0| | | | | -|`cublasCswap_v2_64`|12.0| | | | | | | | | | +|`cublasCswap_v2_64`|12.0| | | |`hipblasCswap_v2_64`|6.1.0| | | | | |`cublasDasum`| | | | |`hipblasDasum`|1.8.2| | | | | |`cublasDasum_64`|12.0| | | |`hipblasDasum_64`|6.1.0| | | | | |`cublasDasum_v2`| | | | |`hipblasDasum`|1.8.2| | | | | @@ -278,9 +278,9 @@ |`cublasDscal_v2`| | | | |`hipblasDscal`|1.8.2| | | | | |`cublasDscal_v2_64`|12.0| | | |`hipblasDscal_64`|6.1.0| | | | | |`cublasDswap`| | | | |`hipblasDswap`|3.0.0| | | | | -|`cublasDswap_64`|12.0| | | | | | | | | | +|`cublasDswap_64`|12.0| | | |`hipblasDswap_64`|6.1.0| | | | | |`cublasDswap_v2`| | | | |`hipblasDswap`|3.0.0| | | | | -|`cublasDswap_v2_64`|12.0| | | | | | | | | | +|`cublasDswap_v2_64`|12.0| | | |`hipblasDswap_64`|6.1.0| | | | | |`cublasDzasum`| | | | |`hipblasDzasum_v2`|6.0.0| | | | | |`cublasDzasum_64`|12.0| | | |`hipblasDzasum_v2_64`|6.1.0| | | | | |`cublasDzasum_v2`| | | | |`hipblasDzasum_v2`|6.0.0| | | | | @@ -368,9 +368,9 @@ |`cublasSscal_v2`| | | | |`hipblasSscal`|1.8.2| | | | | |`cublasSscal_v2_64`|12.0| | | |`hipblasSscal_64`|6.1.0| | | | | |`cublasSswap`| | | | |`hipblasSswap`|3.0.0| | | | | -|`cublasSswap_64`|12.0| | | | | | | | | | +|`cublasSswap_64`|12.0| | | |`hipblasSswap_64`|6.1.0| | | | | |`cublasSswap_v2`| | | | |`hipblasSswap`|3.0.0| | | | | -|`cublasSswap_v2_64`|12.0| | | | | | | | | | +|`cublasSswap_v2_64`|12.0| | | |`hipblasSswap_64`|6.1.0| | | | | |`cublasZaxpy`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | | |`cublasZaxpy_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | | @@ -406,9 +406,9 @@ |`cublasZscal_v2`| | | | |`hipblasZscal_v2`|6.0.0| | | | | |`cublasZscal_v2_64`|12.0| | | |`hipblasZscal_v2_64`|6.1.0| | | | | |`cublasZswap`| | | | |`hipblasZswap_v2`|6.0.0| | | | | -|`cublasZswap_64`|12.0| | | | | | | | | | +|`cublasZswap_64`|12.0| | | |`hipblasZswap_v2_64`|6.1.0| | | | | |`cublasZswap_v2`| | | | |`hipblasZswap_v2`|6.0.0| | | | | -|`cublasZswap_v2_64`|12.0| | | | | | | | | | +|`cublasZswap_v2_64`|12.0| | | |`hipblasZswap_v2_64`|6.1.0| | | | | ## **6. CUBLAS Level-2 Function Reference** diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 0b61b62a..d2020833 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -238,9 +238,9 @@ |`cublasCsscal_v2`| | | | |`hipblasCsscal_v2`|6.0.0| | | | |`rocblas_csscal`|3.5.0| | | | | |`cublasCsscal_v2_64`|12.0| | | |`hipblasCsscal_v2_64`|6.1.0| | | | |`rocblas_csscal_64`|6.1.0| | | | | |`cublasCswap`| | | | |`hipblasCswap_v2`|6.0.0| | | | |`rocblas_cswap`|1.5.0| | | | | -|`cublasCswap_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCswap_64`|12.0| | | |`hipblasCswap_v2_64`|6.1.0| | | | |`rocblas_cswap_64`|6.1.0| | | | | |`cublasCswap_v2`| | | | |`hipblasCswap_v2`|6.0.0| | | | |`rocblas_cswap`|1.5.0| | | | | -|`cublasCswap_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCswap_v2_64`|12.0| | | |`hipblasCswap_v2_64`|6.1.0| | | | |`rocblas_cswap_64`|6.1.0| | | | | |`cublasDasum`| | | | |`hipblasDasum`|1.8.2| | | | |`rocblas_dasum`|1.5.0| | | | | |`cublasDasum_64`|12.0| | | |`hipblasDasum_64`|6.1.0| | | | |`rocblas_dasum_64`|6.1.0| | | | | |`cublasDasum_v2`| | | | |`hipblasDasum`|1.8.2| | | | |`rocblas_dasum`|1.5.0| | | | | @@ -278,9 +278,9 @@ |`cublasDscal_v2`| | | | |`hipblasDscal`|1.8.2| | | | |`rocblas_dscal`|1.5.0| | | | | |`cublasDscal_v2_64`|12.0| | | |`hipblasDscal_64`|6.1.0| | | | |`rocblas_dscal_64`|6.1.0| | | | | |`cublasDswap`| | | | |`hipblasDswap`|3.0.0| | | | |`rocblas_dswap`|1.5.0| | | | | -|`cublasDswap_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDswap_64`|12.0| | | |`hipblasDswap_64`|6.1.0| | | | |`rocblas_dswap_64`|6.1.0| | | | | |`cublasDswap_v2`| | | | |`hipblasDswap`|3.0.0| | | | |`rocblas_dswap`|1.5.0| | | | | -|`cublasDswap_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDswap_v2_64`|12.0| | | |`hipblasDswap_64`|6.1.0| | | | |`rocblas_dswap_64`|6.1.0| | | | | |`cublasDzasum`| | | | |`hipblasDzasum_v2`|6.0.0| | | | |`rocblas_dzasum`|1.5.0| | | | | |`cublasDzasum_64`|12.0| | | |`hipblasDzasum_v2_64`|6.1.0| | | | |`rocblas_dzasum_64`|6.1.0| | | | | |`cublasDzasum_v2`| | | | |`hipblasDzasum_v2`|6.0.0| | | | |`rocblas_dzasum`|1.5.0| | | | | @@ -368,9 +368,9 @@ |`cublasSscal_v2`| | | | |`hipblasSscal`|1.8.2| | | | |`rocblas_sscal`|1.5.0| | | | | |`cublasSscal_v2_64`|12.0| | | |`hipblasSscal_64`|6.1.0| | | | |`rocblas_sscal_64`|6.1.0| | | | | |`cublasSswap`| | | | |`hipblasSswap`|3.0.0| | | | |`rocblas_sswap`|1.5.0| | | | | -|`cublasSswap_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSswap_64`|12.0| | | |`hipblasSswap_64`|6.1.0| | | | |`rocblas_sswap_64`|6.1.0| | | | | |`cublasSswap_v2`| | | | |`hipblasSswap`|3.0.0| | | | |`rocblas_sswap`|1.5.0| | | | | -|`cublasSswap_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSswap_v2_64`|12.0| | | |`hipblasSswap_64`|6.1.0| | | | |`rocblas_sswap_64`|6.1.0| | | | | |`cublasZaxpy`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | |`rocblas_zaxpy`|1.5.0| | | | | |`cublasZaxpy_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | |`rocblas_zaxpy`|1.5.0| | | | | @@ -406,9 +406,9 @@ |`cublasZscal_v2`| | | | |`hipblasZscal_v2`|6.0.0| | | | |`rocblas_zscal`|1.5.0| | | | | |`cublasZscal_v2_64`|12.0| | | |`hipblasZscal_v2_64`|6.1.0| | | | |`rocblas_zscal_64`|6.1.0| | | | | |`cublasZswap`| | | | |`hipblasZswap_v2`|6.0.0| | | | |`rocblas_zswap`|1.5.0| | | | | -|`cublasZswap_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZswap_64`|12.0| | | |`hipblasZswap_v2_64`|6.1.0| | | | |`rocblas_zswap_64`|6.1.0| | | | | |`cublasZswap_v2`| | | | |`hipblasZswap_v2`|6.0.0| | | | |`rocblas_zswap`|1.5.0| | | | | -|`cublasZswap_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZswap_v2_64`|12.0| | | |`hipblasZswap_v2_64`|6.1.0| | | | |`rocblas_zswap_64`|6.1.0| | | | | ## **6. CUBLAS Level-2 Function Reference** diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 67d50069..de9a51eb 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -238,9 +238,9 @@ |`cublasCsscal_v2`| | | | |`rocblas_csscal`|3.5.0| | | | | |`cublasCsscal_v2_64`|12.0| | | |`rocblas_csscal_64`|6.1.0| | | | | |`cublasCswap`| | | | |`rocblas_cswap`|1.5.0| | | | | -|`cublasCswap_64`|12.0| | | | | | | | | | +|`cublasCswap_64`|12.0| | | |`rocblas_cswap_64`|6.1.0| | | | | |`cublasCswap_v2`| | | | |`rocblas_cswap`|1.5.0| | | | | -|`cublasCswap_v2_64`|12.0| | | | | | | | | | +|`cublasCswap_v2_64`|12.0| | | |`rocblas_cswap_64`|6.1.0| | | | | |`cublasDasum`| | | | |`rocblas_dasum`|1.5.0| | | | | |`cublasDasum_64`|12.0| | | |`rocblas_dasum_64`|6.1.0| | | | | |`cublasDasum_v2`| | | | |`rocblas_dasum`|1.5.0| | | | | @@ -278,9 +278,9 @@ |`cublasDscal_v2`| | | | |`rocblas_dscal`|1.5.0| | | | | |`cublasDscal_v2_64`|12.0| | | |`rocblas_dscal_64`|6.1.0| | | | | |`cublasDswap`| | | | |`rocblas_dswap`|1.5.0| | | | | -|`cublasDswap_64`|12.0| | | | | | | | | | +|`cublasDswap_64`|12.0| | | |`rocblas_dswap_64`|6.1.0| | | | | |`cublasDswap_v2`| | | | |`rocblas_dswap`|1.5.0| | | | | -|`cublasDswap_v2_64`|12.0| | | | | | | | | | +|`cublasDswap_v2_64`|12.0| | | |`rocblas_dswap_64`|6.1.0| | | | | |`cublasDzasum`| | | | |`rocblas_dzasum`|1.5.0| | | | | |`cublasDzasum_64`|12.0| | | |`rocblas_dzasum_64`|6.1.0| | | | | |`cublasDzasum_v2`| | | | |`rocblas_dzasum`|1.5.0| | | | | @@ -368,9 +368,9 @@ |`cublasSscal_v2`| | | | |`rocblas_sscal`|1.5.0| | | | | |`cublasSscal_v2_64`|12.0| | | |`rocblas_sscal_64`|6.1.0| | | | | |`cublasSswap`| | | | |`rocblas_sswap`|1.5.0| | | | | -|`cublasSswap_64`|12.0| | | | | | | | | | +|`cublasSswap_64`|12.0| | | |`rocblas_sswap_64`|6.1.0| | | | | |`cublasSswap_v2`| | | | |`rocblas_sswap`|1.5.0| | | | | -|`cublasSswap_v2_64`|12.0| | | | | | | | | | +|`cublasSswap_v2_64`|12.0| | | |`rocblas_sswap_64`|6.1.0| | | | | |`cublasZaxpy`| | | | |`rocblas_zaxpy`|1.5.0| | | | | |`cublasZaxpy_64`|12.0| | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`rocblas_zaxpy`|1.5.0| | | | | @@ -406,9 +406,9 @@ |`cublasZscal_v2`| | | | |`rocblas_zscal`|1.5.0| | | | | |`cublasZscal_v2_64`|12.0| | | |`rocblas_zscal_64`|6.1.0| | | | | |`cublasZswap`| | | | |`rocblas_zswap`|1.5.0| | | | | -|`cublasZswap_64`|12.0| | | | | | | | | | +|`cublasZswap_64`|12.0| | | |`rocblas_zswap_64`|6.1.0| | | | | |`cublasZswap_v2`| | | | |`rocblas_zswap`|1.5.0| | | | | -|`cublasZswap_v2_64`|12.0| | | | | | | | | | +|`cublasZswap_v2_64`|12.0| | | |`rocblas_zswap_64`|6.1.0| | | | | ## **6. CUBLAS Level-2 Function Reference** diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 5dbd8391..62c67f8b 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -150,13 +150,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SWAP {"cublasSswap", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSswap_64", {"hipblasSswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSswap_64", {"hipblasSswap_64", "rocblas_sswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDswap", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDswap_64", {"hipblasDswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDswap_64", {"hipblasDswap_64", "rocblas_dswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasCswap", {"hipblasCswap_v2", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCswap_64", {"hipblasCswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasCswap_64", {"hipblasCswap_v2_64", "rocblas_cswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasZswap", {"hipblasZswap_v2", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZswap_64", {"hipblasZswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasZswap_64", {"hipblasZswap_v2_64", "rocblas_zswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, // AMAX {"cublasIsamax", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, @@ -992,13 +992,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasSwapEx", {"hipblasSwapEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, {"cublasSwapEx_64", {"hipblasSwapEx_64", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, {"cublasSswap_v2", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasSswap_v2_64", {"hipblasSswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSswap_v2_64", {"hipblasSswap_64", "rocblas_sswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDswap_v2", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDswap_v2_64", {"hipblasDswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDswap_v2_64", {"hipblasDswap_64", "rocblas_dswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasCswap_v2", {"hipblasCswap_v2", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCswap_v2_64", {"hipblasCswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasCswap_v2_64", {"hipblasCswap_v2_64", "rocblas_cswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasZswap_v2", {"hipblasZswap_v2", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZswap_v2_64", {"hipblasZswap_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasZswap_v2_64", {"hipblasZswap_v2_64", "rocblas_zswap_64", CONV_LIB_FUNC, API_BLAS, 5}}, // AMAX {"cublasIamaxEx", {"hipblasIamaxEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, @@ -1915,6 +1915,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasCsscal_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasZscal_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasZdscal_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasSswap_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasDswap_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasCswap_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasZswap_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2195,6 +2199,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_csscal_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_zscal_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_zdscal_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_sswap_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_dswap_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_cswap_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_zswap_64", {HIP_6010, HIP_0, HIP_0, }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 3d16541d..77904d84 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2148,6 +2148,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasZdscal_v2_64(blasHandle, n_64, &da, &dcomplexx, incx_64); blasStatus = cublasZdscal_64(blasHandle, n_64, &da, &dcomplexx, incx_64); blasStatus = cublasZdscal_v2_64(blasHandle, n_64, &da, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2_64(cublasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSswap_64(hipblasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSswap_v2_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2_64(cublasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDswap_64(hipblasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDswap_v2_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2_64(cublasHandle_t handle, int64_t n, cuComplex* x, int64_t incx, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCswap_v2_64(hipblasHandle_t handle, int64_t n, hipComplex* x, int64_t incx, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCswap_v2_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCswap_v2_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCswap_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCswap_v2_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2_64(cublasHandle_t handle, int64_t n, cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZswap_v2_64(hipblasHandle_t handle, int64_t n, hipDoubleComplex* x, int64_t incx, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZswap_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZswap_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZswap_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZswap_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 7a015066..6bef54b6 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2233,6 +2233,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_zdscal_64(blasHandle, n_64, &da, &dcomplexx, incx_64); blasStatus = cublasZdscal_64(blasHandle, n_64, &da, &dcomplexx, incx_64); blasStatus = cublasZdscal_v2_64(blasHandle, n_64, &da, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2_64(cublasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sswap_64(rocblas_handle handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_sswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_sswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSswap_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSswap_v2_64(blasHandle, n_64, &fx, incx_64, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2_64(cublasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dswap_64(rocblas_handle handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_dswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_dswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDswap_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDswap_v2_64(blasHandle, n_64, &dx, incx_64, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2_64(cublasHandle_t handle, int64_t n, cuComplex* x, int64_t incx, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cswap_64(rocblas_handle handle, int64_t n, rocblas_float_complex* x, int64_t incx, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_cswap_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_cswap_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCswap_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCswap_v2_64(blasHandle, n_64, &complexx, incx_64, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2_64(cublasHandle_t handle, int64_t n, cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zswap_64(rocblas_handle handle, int64_t n, rocblas_double_complex* x, int64_t incx, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zswap_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zswap_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZswap_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZswap_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dcomplexy, incy_64); #endif return 0;