diff --git a/bin/hipify-perl b/bin/hipify-perl index 618a4bef..a9381412 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1782,7 +1782,9 @@ sub experimentalSubstitutions { sub rocSubstitutions { subst("cublasAxpyEx", "rocblas_axpy_ex", "library"); subst("cublasCaxpy", "rocblas_caxpy", "library"); + subst("cublasCaxpy_64", "rocblas_caxpy_64", "library"); subst("cublasCaxpy_v2", "rocblas_caxpy", "library"); + subst("cublasCaxpy_v2_64", "rocblas_caxpy_64", "library"); subst("cublasCcopy", "rocblas_ccopy", "library"); subst("cublasCcopy_v2", "rocblas_ccopy", "library"); subst("cublasCdgmm", "rocblas_cdgmm", "library"); @@ -1875,7 +1877,9 @@ sub rocSubstitutions { subst("cublasDasum_v2", "rocblas_dasum", "library"); subst("cublasDasum_v2_64", "rocblas_dasum_64", "library"); subst("cublasDaxpy", "rocblas_daxpy", "library"); + subst("cublasDaxpy_64", "rocblas_daxpy_64", "library"); subst("cublasDaxpy_v2", "rocblas_daxpy", "library"); + subst("cublasDaxpy_v2_64", "rocblas_daxpy_64", "library"); subst("cublasDcopy", "rocblas_dcopy", "library"); subst("cublasDcopy_v2", "rocblas_dcopy", "library"); subst("cublasDdgmm", "rocblas_ddgmm", "library"); @@ -2015,7 +2019,9 @@ sub rocSubstitutions { subst("cublasSasum_v2", "rocblas_sasum", "library"); subst("cublasSasum_v2_64", "rocblas_sasum_64", "library"); subst("cublasSaxpy", "rocblas_saxpy", "library"); + subst("cublasSaxpy_64", "rocblas_saxpy_64", "library"); subst("cublasSaxpy_v2", "rocblas_saxpy", "library"); + subst("cublasSaxpy_v2_64", "rocblas_saxpy_64", "library"); subst("cublasScalEx", "rocblas_scal_ex", "library"); subst("cublasScasum", "rocblas_scasum", "library"); subst("cublasScasum_64", "rocblas_scasum_64", "library"); @@ -2106,7 +2112,9 @@ sub rocSubstitutions { subst("cublasTSTgemvBatched", "rocblas_tstgemv_batched", "library"); subst("cublasTSTgemvStridedBatched", "rocblas_tstgemv_strided_batched", "library"); subst("cublasZaxpy", "rocblas_zaxpy", "library"); + subst("cublasZaxpy_64", "rocblas_zaxpy_64", "library"); subst("cublasZaxpy_v2", "rocblas_zaxpy", "library"); + subst("cublasZaxpy_v2_64", "rocblas_zaxpy_64", "library"); subst("cublasZcopy", "rocblas_zcopy", "library"); subst("cublasZcopy_v2", "rocblas_zcopy", "library"); subst("cublasZdgmm", "rocblas_zdgmm", "library"); @@ -3724,7 +3732,9 @@ sub simpleSubstitutions { subst("make_cuFloatComplex", "make_hipFloatComplex", "complex"); subst("cublasAxpyEx", "hipblasAxpyEx_v2", "library"); subst("cublasCaxpy", "hipblasCaxpy_v2", "library"); + subst("cublasCaxpy_64", "hipblasCaxpy_v2_64", "library"); subst("cublasCaxpy_v2", "hipblasCaxpy_v2", "library"); + subst("cublasCaxpy_v2_64", "hipblasCaxpy_v2_64", "library"); subst("cublasCcopy", "hipblasCcopy_v2", "library"); subst("cublasCcopy_v2", "hipblasCcopy_v2", "library"); subst("cublasCdgmm", "hipblasCdgmm_v2", "library"); @@ -3822,7 +3832,9 @@ sub simpleSubstitutions { subst("cublasDasum_v2", "hipblasDasum", "library"); subst("cublasDasum_v2_64", "hipblasDasum_64", "library"); subst("cublasDaxpy", "hipblasDaxpy", "library"); + subst("cublasDaxpy_64", "hipblasDaxpy_64", "library"); subst("cublasDaxpy_v2", "hipblasDaxpy", "library"); + subst("cublasDaxpy_v2_64", "hipblasDaxpy_64", "library"); subst("cublasDcopy", "hipblasDcopy", "library"); subst("cublasDcopy_v2", "hipblasDcopy", "library"); subst("cublasDdgmm", "hipblasDdgmm", "library"); @@ -3961,7 +3973,9 @@ sub simpleSubstitutions { subst("cublasSasum_v2", "hipblasSasum", "library"); subst("cublasSasum_v2_64", "hipblasSasum_64", "library"); subst("cublasSaxpy", "hipblasSaxpy", "library"); + subst("cublasSaxpy_64", "hipblasSaxpy_64", "library"); subst("cublasSaxpy_v2", "hipblasSaxpy", "library"); + subst("cublasSaxpy_v2_64", "hipblasSaxpy_64", "library"); subst("cublasScalEx", "hipblasScalEx_v2", "library"); subst("cublasScasum", "hipblasScasum_v2", "library"); subst("cublasScasum_64", "hipblasScasum_v2_64", "library"); @@ -4053,7 +4067,9 @@ sub simpleSubstitutions { subst("cublasStrsv", "hipblasStrsv", "library"); subst("cublasStrsv_v2", "hipblasStrsv", "library"); subst("cublasZaxpy", "hipblasZaxpy_v2", "library"); + subst("cublasZaxpy_64", "hipblasZaxpy_v2_64", "library"); subst("cublasZaxpy_v2", "hipblasZaxpy_v2", "library"); + subst("cublasZaxpy_v2_64", "hipblasZaxpy_v2_64", "library"); subst("cublasZcopy", "hipblasZcopy_v2", "library"); subst("cublasZcopy_v2", "hipblasZcopy_v2", "library"); subst("cublasZdgmm", "hipblasZdgmm_v2", "library"); @@ -10786,8 +10802,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZdgmm_64", "cublasZcopy_v2_64", "cublasZcopy_64", - "cublasZaxpy_v2_64", - "cublasZaxpy_64", "cublasXerbla", "cublasUint8gemmBias", "cublasTSTgemvStridedBatched_64", @@ -10884,8 +10898,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasScnrm2_v2_64", "cublasScnrm2_64", "cublasScalEx_64", - "cublasSaxpy_v2_64", - "cublasSaxpy_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -11003,8 +11015,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDdgmm_64", "cublasDcopy_v2_64", "cublasDcopy_64", - "cublasDaxpy_v2_64", - "cublasDaxpy_64", "cublasCtrttp", "cublasCtrsv_v2_64", "cublasCtrsv_64", @@ -11112,8 +11122,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCdgmm_64", "cublasCcopy_v2_64", "cublasCcopy_64", - "cublasCaxpy_v2_64", - "cublasCaxpy_64", "cublasAxpyEx_64", "cublasAsumEx_64", "cublasAsumEx", @@ -11283,8 +11291,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZdgmm_64", "cublasZcopy_v2_64", "cublasZcopy_64", - "cublasZaxpy_v2_64", - "cublasZaxpy_64", "cublasXerbla", "cublasUint8gemmBias", "cublasTSTgemvStridedBatched_64", @@ -11382,8 +11388,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasScnrm2_v2_64", "cublasScnrm2_64", "cublasScalEx_64", - "cublasSaxpy_v2_64", - "cublasSaxpy_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", @@ -11500,8 +11504,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDdgmm_64", "cublasDcopy_v2_64", "cublasDcopy_64", - "cublasDaxpy_v2_64", - "cublasDaxpy_64", "cublasCtrttp", "cublasCtrsv_v2_64", "cublasCtrsv_64", @@ -11613,8 +11615,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCdgmm_64", "cublasCcopy_v2_64", "cublasCcopy_64", - "cublasCaxpy_v2_64", - "cublasCaxpy_64", "cublasAxpyEx_64", "cublasAsumEx_64", "cublasAsumEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 0f680b34..062bbfc5 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -204,9 +204,9 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCaxpy`| | | | |`hipblasCaxpy_v2`|6.0.0| | | | | -|`cublasCaxpy_64`|12.0| | | | | | | | | | +|`cublasCaxpy_64`|12.0| | | |`hipblasCaxpy_v2_64`|6.1.0| | | | | |`cublasCaxpy_v2`| | | | |`hipblasCaxpy_v2`|6.0.0| | | | | -|`cublasCaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasCaxpy_v2_64`|12.0| | | |`hipblasCaxpy_v2_64`|6.1.0| | | | | |`cublasCcopy`| | | | |`hipblasCcopy_v2`|6.0.0| | | | | |`cublasCcopy_64`|12.0| | | | | | | | | | |`cublasCcopy_v2`| | | | |`hipblasCcopy_v2`|6.0.0| | | | | @@ -246,9 +246,9 @@ |`cublasDasum_v2`| | | | |`hipblasDasum`|1.8.2| | | | | |`cublasDasum_v2_64`|12.0| | | |`hipblasDasum_64`|6.1.0| | | | | |`cublasDaxpy`| | | | |`hipblasDaxpy`|1.8.2| | | | | -|`cublasDaxpy_64`|12.0| | | | | | | | | | +|`cublasDaxpy_64`|12.0| | | |`hipblasDaxpy_64`|6.1.0| | | | | |`cublasDaxpy_v2`| | | | |`hipblasDaxpy`|1.8.2| | | | | -|`cublasDaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasDaxpy_v2_64`|12.0| | | |`hipblasDaxpy_64`|6.1.0| | | | | |`cublasDcopy`| | | | |`hipblasDcopy`|1.8.2| | | | | |`cublasDcopy_64`|12.0| | | | | | | | | | |`cublasDcopy_v2`| | | | |`hipblasDcopy`|1.8.2| | | | | @@ -328,9 +328,9 @@ |`cublasSasum_v2`| | | | |`hipblasSasum`|1.8.2| | | | | |`cublasSasum_v2_64`|12.0| | | |`hipblasSasum_64`|6.1.0| | | | | |`cublasSaxpy`| | | | |`hipblasSaxpy`|1.8.2| | | | | -|`cublasSaxpy_64`|12.0| | | | | | | | | | +|`cublasSaxpy_64`|12.0| | | |`hipblasSaxpy_64`|6.1.0| | | | | |`cublasSaxpy_v2`| | | | |`hipblasSaxpy`|1.8.2| | | | | -|`cublasSaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasSaxpy_v2_64`|12.0| | | |`hipblasSaxpy_64`|6.1.0| | | | | |`cublasScasum`| | | | |`hipblasScasum_v2`|6.0.0| | | | | |`cublasScasum_64`|12.0| | | |`hipblasScasum_v2_64`|6.1.0| | | | | |`cublasScasum_v2`| | | | |`hipblasScasum_v2`|6.0.0| | | | | @@ -372,9 +372,9 @@ |`cublasSswap_v2`| | | | |`hipblasSswap`|3.0.0| | | | | |`cublasSswap_v2_64`|12.0| | | | | | | | | | |`cublasZaxpy`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | | -|`cublasZaxpy_64`|12.0| | | | | | | | | | +|`cublasZaxpy_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | | -|`cublasZaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasZaxpy_v2_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | | |`cublasZcopy`| | | | |`hipblasZcopy_v2`|6.0.0| | | | | |`cublasZcopy_64`|12.0| | | | | | | | | | |`cublasZcopy_v2`| | | | |`hipblasZcopy_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index c2d89766..7389e0ee 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -204,9 +204,9 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCaxpy`| | | | |`hipblasCaxpy_v2`|6.0.0| | | | |`rocblas_caxpy`|1.5.0| | | | | -|`cublasCaxpy_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCaxpy_64`|12.0| | | |`hipblasCaxpy_v2_64`|6.1.0| | | | |`rocblas_caxpy_64`|6.1.0| | | | | |`cublasCaxpy_v2`| | | | |`hipblasCaxpy_v2`|6.0.0| | | | |`rocblas_caxpy`|1.5.0| | | | | -|`cublasCaxpy_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCaxpy_v2_64`|12.0| | | |`hipblasCaxpy_v2_64`|6.1.0| | | | |`rocblas_caxpy_64`|6.1.0| | | | | |`cublasCcopy`| | | | |`hipblasCcopy_v2`|6.0.0| | | | |`rocblas_ccopy`|1.5.0| | | | | |`cublasCcopy_64`|12.0| | | | | | | | | | | | | | | | |`cublasCcopy_v2`| | | | |`hipblasCcopy_v2`|6.0.0| | | | |`rocblas_ccopy`|1.5.0| | | | | @@ -246,9 +246,9 @@ |`cublasDasum_v2`| | | | |`hipblasDasum`|1.8.2| | | | |`rocblas_dasum`|1.5.0| | | | | |`cublasDasum_v2_64`|12.0| | | |`hipblasDasum_64`|6.1.0| | | | |`rocblas_dasum_64`|6.1.0| | | | | |`cublasDaxpy`| | | | |`hipblasDaxpy`|1.8.2| | | | |`rocblas_daxpy`|1.5.0| | | | | -|`cublasDaxpy_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDaxpy_64`|12.0| | | |`hipblasDaxpy_64`|6.1.0| | | | |`rocblas_daxpy_64`|6.1.0| | | | | |`cublasDaxpy_v2`| | | | |`hipblasDaxpy`|1.8.2| | | | |`rocblas_daxpy`|1.5.0| | | | | -|`cublasDaxpy_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDaxpy_v2_64`|12.0| | | |`hipblasDaxpy_64`|6.1.0| | | | |`rocblas_daxpy_64`|6.1.0| | | | | |`cublasDcopy`| | | | |`hipblasDcopy`|1.8.2| | | | |`rocblas_dcopy`|1.5.0| | | | | |`cublasDcopy_64`|12.0| | | | | | | | | | | | | | | | |`cublasDcopy_v2`| | | | |`hipblasDcopy`|1.8.2| | | | |`rocblas_dcopy`|1.5.0| | | | | @@ -328,9 +328,9 @@ |`cublasSasum_v2`| | | | |`hipblasSasum`|1.8.2| | | | |`rocblas_sasum`|1.5.0| | | | | |`cublasSasum_v2_64`|12.0| | | |`hipblasSasum_64`|6.1.0| | | | |`rocblas_sasum_64`|6.1.0| | | | | |`cublasSaxpy`| | | | |`hipblasSaxpy`|1.8.2| | | | |`rocblas_saxpy`|1.5.0| | | | | -|`cublasSaxpy_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSaxpy_64`|12.0| | | |`hipblasSaxpy_64`|6.1.0| | | | |`rocblas_saxpy_64`|6.1.0| | | | | |`cublasSaxpy_v2`| | | | |`hipblasSaxpy`|1.8.2| | | | |`rocblas_saxpy`|1.5.0| | | | | -|`cublasSaxpy_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSaxpy_v2_64`|12.0| | | |`hipblasSaxpy_64`|6.1.0| | | | |`rocblas_saxpy_64`|6.1.0| | | | | |`cublasScasum`| | | | |`hipblasScasum_v2`|6.0.0| | | | |`rocblas_scasum`|1.5.0| | | | | |`cublasScasum_64`|12.0| | | |`hipblasScasum_v2_64`|6.1.0| | | | |`rocblas_scasum_64`|6.1.0| | | | | |`cublasScasum_v2`| | | | |`hipblasScasum_v2`|6.0.0| | | | |`rocblas_scasum`|1.5.0| | | | | @@ -372,9 +372,9 @@ |`cublasSswap_v2`| | | | |`hipblasSswap`|3.0.0| | | | |`rocblas_sswap`|1.5.0| | | | | |`cublasSswap_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZaxpy`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | |`rocblas_zaxpy`|1.5.0| | | | | -|`cublasZaxpy_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZaxpy_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`hipblasZaxpy_v2`|6.0.0| | | | |`rocblas_zaxpy`|1.5.0| | | | | -|`cublasZaxpy_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZaxpy_v2_64`|12.0| | | |`hipblasZaxpy_v2_64`|6.1.0| | | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZcopy`| | | | |`hipblasZcopy_v2`|6.0.0| | | | |`rocblas_zcopy`|1.5.0| | | | | |`cublasZcopy_64`|12.0| | | | | | | | | | | | | | | | |`cublasZcopy_v2`| | | | |`hipblasZcopy_v2`|6.0.0| | | | |`rocblas_zcopy`|1.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index d0597b9a..bf99f412 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -204,9 +204,9 @@ |**CUDA**|**A**|**D**|**C**|**R**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCaxpy`| | | | |`rocblas_caxpy`|1.5.0| | | | | -|`cublasCaxpy_64`|12.0| | | | | | | | | | +|`cublasCaxpy_64`|12.0| | | |`rocblas_caxpy_64`|6.1.0| | | | | |`cublasCaxpy_v2`| | | | |`rocblas_caxpy`|1.5.0| | | | | -|`cublasCaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasCaxpy_v2_64`|12.0| | | |`rocblas_caxpy_64`|6.1.0| | | | | |`cublasCcopy`| | | | |`rocblas_ccopy`|1.5.0| | | | | |`cublasCcopy_64`|12.0| | | | | | | | | | |`cublasCcopy_v2`| | | | |`rocblas_ccopy`|1.5.0| | | | | @@ -246,9 +246,9 @@ |`cublasDasum_v2`| | | | |`rocblas_dasum`|1.5.0| | | | | |`cublasDasum_v2_64`|12.0| | | |`rocblas_dasum_64`|6.1.0| | | | | |`cublasDaxpy`| | | | |`rocblas_daxpy`|1.5.0| | | | | -|`cublasDaxpy_64`|12.0| | | | | | | | | | +|`cublasDaxpy_64`|12.0| | | |`rocblas_daxpy_64`|6.1.0| | | | | |`cublasDaxpy_v2`| | | | |`rocblas_daxpy`|1.5.0| | | | | -|`cublasDaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasDaxpy_v2_64`|12.0| | | |`rocblas_daxpy_64`|6.1.0| | | | | |`cublasDcopy`| | | | |`rocblas_dcopy`|1.5.0| | | | | |`cublasDcopy_64`|12.0| | | | | | | | | | |`cublasDcopy_v2`| | | | |`rocblas_dcopy`|1.5.0| | | | | @@ -328,9 +328,9 @@ |`cublasSasum_v2`| | | | |`rocblas_sasum`|1.5.0| | | | | |`cublasSasum_v2_64`|12.0| | | |`rocblas_sasum_64`|6.1.0| | | | | |`cublasSaxpy`| | | | |`rocblas_saxpy`|1.5.0| | | | | -|`cublasSaxpy_64`|12.0| | | | | | | | | | +|`cublasSaxpy_64`|12.0| | | |`rocblas_saxpy_64`|6.1.0| | | | | |`cublasSaxpy_v2`| | | | |`rocblas_saxpy`|1.5.0| | | | | -|`cublasSaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasSaxpy_v2_64`|12.0| | | |`rocblas_saxpy_64`|6.1.0| | | | | |`cublasScasum`| | | | |`rocblas_scasum`|1.5.0| | | | | |`cublasScasum_64`|12.0| | | |`rocblas_scasum_64`|6.1.0| | | | | |`cublasScasum_v2`| | | | |`rocblas_scasum`|1.5.0| | | | | @@ -372,9 +372,9 @@ |`cublasSswap_v2`| | | | |`rocblas_sswap`|1.5.0| | | | | |`cublasSswap_v2_64`|12.0| | | | | | | | | | |`cublasZaxpy`| | | | |`rocblas_zaxpy`|1.5.0| | | | | -|`cublasZaxpy_64`|12.0| | | | | | | | | | +|`cublasZaxpy_64`|12.0| | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZaxpy_v2`| | | | |`rocblas_zaxpy`|1.5.0| | | | | -|`cublasZaxpy_v2_64`|12.0| | | | | | | | | | +|`cublasZaxpy_v2_64`|12.0| | | |`rocblas_zaxpy_64`|6.1.0| | | | | |`cublasZcopy`| | | | |`rocblas_zcopy`|1.5.0| | | | | |`cublasZcopy_64`|12.0| | | | | | | | | | |`cublasZcopy_v2`| | | | |`rocblas_zcopy`|1.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index b9738114..797a833d 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -130,13 +130,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // AXPY {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSaxpy_64", {"hipblasSaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSaxpy_64", {"hipblasSaxpy_64", "rocblas_saxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDaxpy", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDaxpy_64", {"hipblasDaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDaxpy_64", {"hipblasDaxpy_64", "rocblas_daxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasCaxpy", {"hipblasCaxpy_v2", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCaxpy_64", {"hipblasCaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasCaxpy_64", {"hipblasCaxpy_v2_64", "rocblas_caxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasZaxpy", {"hipblasZaxpy_v2", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZaxpy_64", {"hipblasZaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasZaxpy_64", {"hipblasZaxpy_v2_64", "rocblas_zaxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, // COPY {"cublasScopy", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, @@ -968,13 +968,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasAxpyEx", {"hipblasAxpyEx_v2", "rocblas_axpy_ex", CONV_LIB_FUNC, API_BLAS, 8}}, {"cublasAxpyEx_64", {"hipblasAxpyEx_64", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, {"cublasSaxpy_v2", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasSaxpy_v2_64", {"hipblasSaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasSaxpy_v2_64", {"hipblasSaxpy_64", "rocblas_saxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasDaxpy_v2", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDaxpy_v2_64", {"hipblasDaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasDaxpy_v2_64", {"hipblasDaxpy_64", "rocblas_daxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasCaxpy_v2", {"hipblasCaxpy_v2", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCaxpy_v2_64", {"hipblasCaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasCaxpy_v2_64", {"hipblasCaxpy_v2_64", "rocblas_caxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, {"cublasZaxpy_v2", {"hipblasZaxpy_v2", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZaxpy_v2_64", {"hipblasZaxpy_64", "", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + {"cublasZaxpy_v2_64", {"hipblasZaxpy_v2_64", "rocblas_zaxpy_64", CONV_LIB_FUNC, API_BLAS, 5}}, // COPY {"cublasCopyEx", {"hipblasCopyEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, @@ -1883,6 +1883,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDasum_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasScasum_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"hipblasDzasum_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasSaxpy_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasDaxpy_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasCaxpy_v2_64", {HIP_6010, HIP_0, HIP_0, }}, + {"hipblasZaxpy_v2_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2131,6 +2135,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dasum_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_scasum_64", {HIP_6010, HIP_0, HIP_0, }}, {"rocblas_dzasum_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_saxpy_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_daxpy_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_caxpy_64", {HIP_6010, HIP_0, HIP_0, }}, + {"rocblas_zaxpy_64", {HIP_6010, HIP_0, HIP_0, }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index cf0cc658..75fc404b 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -1922,6 +1922,34 @@ int main() { // CHECK-NEXT: blasStatus = hipblasDzasum_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); blasStatus = cublasDzasum_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); blasStatus = cublasDzasum_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2_64(cublasHandle_t handle, int64_t n, const float* alpha, const float* x, int64_t incx, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpy_64(hipblasHandle_t handle, int64_t n, const float* alpha, const float* x, int64_t incx, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSaxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSaxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSaxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSaxpy_v2_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2_64(cublasHandle_t handle, int64_t n, const double* alpha, const double* x, int64_t incx, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpy_64(hipblasHandle_t handle, int64_t n, const double* alpha, const double* x, int64_t incx, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDaxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDaxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDaxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDaxpy_v2_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpy_v2_64(hipblasHandle_t handle, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCaxpy_v2_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCaxpy_v2_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCaxpy_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCaxpy_v2_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpy_v2_64(hipblasHandle_t handle, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZaxpy_v2_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZaxpy_v2_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZaxpy_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZaxpy_v2_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 2bf02ace..f7284a81 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -2007,6 +2007,34 @@ int main() { // CHECK-NEXT: blasStatus = rocblas_dzasum_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); blasStatus = cublasDzasum_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); blasStatus = cublasDzasum_v2_64(blasHandle, n_64, &dcomplexx, incx_64, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2_64(cublasHandle_t handle, int64_t n, const float* alpha, const float* x, int64_t incx, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_saxpy_64(rocblas_handle handle, int64_t n, const float* alpha, const float* x, int64_t incx, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_saxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_saxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSaxpy_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + blasStatus = cublasSaxpy_v2_64(blasHandle, n_64, &fa, &fx, incx_64, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2_64(cublasHandle_t handle, int64_t n, const double* alpha, const double* x, int64_t incx, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_daxpy_64(rocblas_handle handle, int64_t n, const double* alpha, const double* x, int64_t incx, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_daxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_daxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDaxpy_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + blasStatus = cublasDaxpy_v2_64(blasHandle, n_64, &da, &dx, incx_64, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_caxpy_64(rocblas_handle handle, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_caxpy_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_caxpy_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCaxpy_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + blasStatus = cublasCaxpy_v2_64(blasHandle, n_64, &complexa, &complexx, incx_64, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zaxpy_64(rocblas_handle handle, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zaxpy_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zaxpy_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZaxpy_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); + blasStatus = cublasZaxpy_v2_64(blasHandle, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64); #endif return 0;