From b4f18fd8b6b0affbbda8da17e28ca4af8bc62408 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 28 Aug 2023 18:56:32 +0200 Subject: [PATCH] [HIPIFY][#674][#837][rocSPARSE][feature] rocSPARSE support - Step 32 - functions + Implemented a new function call transformation type "additional non-const arg" (`e_add_var_argument`) + The variable for the adding function argument is a copy of another function call argument + The new transformation is tested on rocSPARSE functions `rocsparse_(s|d|c|z)gpsv_interleaved_batch`, where: - an additional argument rocsparse_int batch_stride is copied from the previous one: rocsparse_int batch_count; - it is how hipsparse(S|D|C|Z)gpsvInterleavedBatch calls rocsparse_(s|d|c|z)gpsv_interleaved_batch in its implementation; + Updated synthetic tests and the regenerated hipify-perl and SPARSE docs --- bin/hipify-perl | 4 + .../CUSPARSE_API_supported_by_HIP_and_ROC.md | 8 +- docs/tables/CUSPARSE_API_supported_by_ROC.md | 8 +- src/CUDA2HIP_SPARSE_API_functions.cpp | 16 ++-- src/CUDA2HIP_Scripting.h | 5 +- src/HipifyAction.cpp | 78 +++++++++++++++++-- .../synthetic/libraries/cusparse2rocsparse.cu | 47 ++++++++++- 7 files changed, 139 insertions(+), 27 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 2b4a30c2..8496c4d9 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1689,6 +1689,7 @@ sub rocSubstitutions { subst("cusparseCgebsr2gebsc_bufferSize", "rocsparse_cgebsr2gebsc_buffer_size", "library"); subst("cusparseCgebsr2gebsr", "rocsparse_cgebsr2gebsr", "library"); subst("cusparseCgebsr2gebsr_bufferSize", "rocsparse_cgebsr2gebsr_buffer_size", "library"); + subst("cusparseCgpsvInterleavedBatch", "rocsparse_cgpsv_interleaved_batch", "library"); subst("cusparseCnnz", "rocsparse_cnnz", "library"); subst("cusparseCnnz_compress", "rocsparse_cnnz_compress", "library"); subst("cusparseCooAoSGet", "rocsparse_coo_aos_get", "library"); @@ -1737,6 +1738,7 @@ sub rocSubstitutions { subst("cusparseDgebsr2gebsc_bufferSize", "rocsparse_dgebsr2gebsc_buffer_size", "library"); subst("cusparseDgebsr2gebsr", "rocsparse_dgebsr2gebsr", "library"); subst("cusparseDgebsr2gebsr_bufferSize", "rocsparse_dgebsr2gebsr_buffer_size", "library"); + subst("cusparseDgpsvInterleavedBatch", "rocsparse_dgpsv_interleaved_batch", "library"); subst("cusparseDnMatGet", "rocsparse_dnmat_get", "library"); subst("cusparseDnMatGetStridedBatch", "rocsparse_dnmat_get_strided_batch", "library"); subst("cusparseDnMatGetValues", "rocsparse_dnmat_get_values", "library"); @@ -1794,6 +1796,7 @@ sub rocSubstitutions { subst("cusparseSgebsr2gebsc_bufferSize", "rocsparse_sgebsr2gebsc_buffer_size", "library"); subst("cusparseSgebsr2gebsr", "rocsparse_sgebsr2gebsr", "library"); subst("cusparseSgebsr2gebsr_bufferSize", "rocsparse_sgebsr2gebsr_buffer_size", "library"); + subst("cusparseSgpsvInterleavedBatch", "rocsparse_sgpsv_interleaved_batch", "library"); subst("cusparseSnnz", "rocsparse_snnz", "library"); subst("cusparseSnnz_compress", "rocsparse_snnz_compress", "library"); subst("cusparseSpMV", "rocsparse_spmv", "library"); @@ -1850,6 +1853,7 @@ sub rocSubstitutions { subst("cusparseZgebsr2gebsc_bufferSize", "rocsparse_zgebsr2gebsc_buffer_size", "library"); subst("cusparseZgebsr2gebsr", "rocsparse_zgebsr2gebsr", "library"); subst("cusparseZgebsr2gebsr_bufferSize", "rocsparse_zgebsr2gebsr_buffer_size", "library"); + subst("cusparseZgpsvInterleavedBatch", "rocsparse_zgpsv_interleaved_batch", "library"); subst("cusparseZnnz", "rocsparse_znnz", "library"); subst("cusparseZnnz_compress", "rocsparse_znnz_compress", "library"); subst("cublas.h", "rocblas.h", "include_cuda_main_header"); diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md index c7a50c89..ae8014bc 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md @@ -489,7 +489,7 @@ |`cusparseCcsrilu02_bufferSize`| |12.2| |`hipsparseCcsrilu02_bufferSize`|3.1.0| | | | | | | | | |`cusparseCcsrilu02_bufferSizeExt`| |12.2| |`hipsparseCcsrilu02_bufferSizeExt`|3.1.0| | | | | | | | | |`cusparseCcsrilu02_numericBoost`| |12.2| |`hipsparseCcsrilu02_numericBoost`|3.10.0| | | | | | | | | -|`cusparseCgpsvInterleavedBatch`|9.2| | |`hipsparseCgpsvInterleavedBatch`|5.1.0| | | | | | | | | +|`cusparseCgpsvInterleavedBatch`|9.2| | |`hipsparseCgpsvInterleavedBatch`|5.1.0| | | |`rocsparse_cgpsv_interleaved_batch`|5.1.0| | | | |`cusparseCgpsvInterleavedBatch_bufferSizeExt`|9.2| | |`hipsparseCgpsvInterleavedBatch_bufferSizeExt`|5.1.0| | | | | | | | | |`cusparseCgtsv`| |10.2|11.0| | | | | | | | | | | |`cusparseCgtsv2`|9.0| | |`hipsparseCgtsv2`|4.3.0| | | | | | | | | @@ -523,7 +523,7 @@ |`cusparseDcsrilu02_bufferSize`| |12.2| |`hipsparseDcsrilu02_bufferSize`|1.9.2| | | | | | | | | |`cusparseDcsrilu02_bufferSizeExt`| |12.2| |`hipsparseDcsrilu02_bufferSizeExt`|1.9.2| | | | | | | | | |`cusparseDcsrilu02_numericBoost`| |12.2| |`hipsparseDcsrilu02_numericBoost`|3.10.0| | | | | | | | | -|`cusparseDgpsvInterleavedBatch`|9.2| | |`hipsparseDgpsvInterleavedBatch`|5.1.0| | | | | | | | | +|`cusparseDgpsvInterleavedBatch`|9.2| | |`hipsparseDgpsvInterleavedBatch`|5.1.0| | | |`rocsparse_dgpsv_interleaved_batch`|5.1.0| | | | |`cusparseDgpsvInterleavedBatch_bufferSizeExt`|9.2| | |`hipsparseDgpsvInterleavedBatch_bufferSizeExt`|5.1.0| | | | | | | | | |`cusparseDgtsv`| |10.2|11.0| | | | | | | | | | | |`cusparseDgtsv2`|9.0| | |`hipsparseDgtsv2`|4.3.0| | | | | | | | | @@ -556,7 +556,7 @@ |`cusparseScsrilu02_bufferSize`| |12.2| |`hipsparseScsrilu02_bufferSize`|1.9.2| | | | | | | | | |`cusparseScsrilu02_bufferSizeExt`| |12.2| |`hipsparseScsrilu02_bufferSizeExt`|1.9.2| | | | | | | | | |`cusparseScsrilu02_numericBoost`| |12.2| |`hipsparseScsrilu02_numericBoost`|3.10.0| | | | | | | | | -|`cusparseSgpsvInterleavedBatch`|9.2| | |`hipsparseSgpsvInterleavedBatch`|5.1.0| | | | | | | | | +|`cusparseSgpsvInterleavedBatch`|9.2| | |`hipsparseSgpsvInterleavedBatch`|5.1.0| | | |`rocsparse_sgpsv_interleaved_batch`|5.1.0| | | | |`cusparseSgpsvInterleavedBatch_bufferSizeExt`|9.2| | |`hipsparseSgpsvInterleavedBatch_bufferSizeExt`|5.1.0| | | | | | | | | |`cusparseSgtsv`| |10.2|11.0| | | | | | | | | | | |`cusparseSgtsv2`|9.0| | |`hipsparseSgtsv2`|4.3.0| | | | | | | | | @@ -593,7 +593,7 @@ |`cusparseZcsrilu02_bufferSize`| |12.2| |`hipsparseZcsrilu02_bufferSize`|3.1.0| | | | | | | | | |`cusparseZcsrilu02_bufferSizeExt`| |12.2| |`hipsparseZcsrilu02_bufferSizeExt`|3.1.0| | | | | | | | | |`cusparseZcsrilu02_numericBoost`| |12.2| |`hipsparseZcsrilu02_numericBoost`|3.10.0| | | | | | | | | -|`cusparseZgpsvInterleavedBatch`|9.2| | |`hipsparseZgpsvInterleavedBatch`|5.1.0| | | | | | | | | +|`cusparseZgpsvInterleavedBatch`|9.2| | |`hipsparseZgpsvInterleavedBatch`|5.1.0| | | |`rocsparse_zgpsv_interleaved_batch`|5.1.0| | | | |`cusparseZgpsvInterleavedBatch_bufferSizeExt`|9.2| | |`hipsparseZgpsvInterleavedBatch_bufferSizeExt`|5.1.0| | | | | | | | | |`cusparseZgtsv`| |10.2|11.0| | | | | | | | | | | |`cusparseZgtsv2`|9.0| | |`hipsparseZgtsv2`|4.3.0| | | | | | | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md index e935db68..2941477d 100644 --- a/docs/tables/CUSPARSE_API_supported_by_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md @@ -489,7 +489,7 @@ |`cusparseCcsrilu02_bufferSize`| |12.2| | | | | | | |`cusparseCcsrilu02_bufferSizeExt`| |12.2| | | | | | | |`cusparseCcsrilu02_numericBoost`| |12.2| | | | | | | -|`cusparseCgpsvInterleavedBatch`|9.2| | | | | | | | +|`cusparseCgpsvInterleavedBatch`|9.2| | |`rocsparse_cgpsv_interleaved_batch`|5.1.0| | | | |`cusparseCgpsvInterleavedBatch_bufferSizeExt`|9.2| | | | | | | | |`cusparseCgtsv`| |10.2|11.0| | | | | | |`cusparseCgtsv2`|9.0| | | | | | | | @@ -523,7 +523,7 @@ |`cusparseDcsrilu02_bufferSize`| |12.2| | | | | | | |`cusparseDcsrilu02_bufferSizeExt`| |12.2| | | | | | | |`cusparseDcsrilu02_numericBoost`| |12.2| | | | | | | -|`cusparseDgpsvInterleavedBatch`|9.2| | | | | | | | +|`cusparseDgpsvInterleavedBatch`|9.2| | |`rocsparse_dgpsv_interleaved_batch`|5.1.0| | | | |`cusparseDgpsvInterleavedBatch_bufferSizeExt`|9.2| | | | | | | | |`cusparseDgtsv`| |10.2|11.0| | | | | | |`cusparseDgtsv2`|9.0| | | | | | | | @@ -556,7 +556,7 @@ |`cusparseScsrilu02_bufferSize`| |12.2| | | | | | | |`cusparseScsrilu02_bufferSizeExt`| |12.2| | | | | | | |`cusparseScsrilu02_numericBoost`| |12.2| | | | | | | -|`cusparseSgpsvInterleavedBatch`|9.2| | | | | | | | +|`cusparseSgpsvInterleavedBatch`|9.2| | |`rocsparse_sgpsv_interleaved_batch`|5.1.0| | | | |`cusparseSgpsvInterleavedBatch_bufferSizeExt`|9.2| | | | | | | | |`cusparseSgtsv`| |10.2|11.0| | | | | | |`cusparseSgtsv2`|9.0| | | | | | | | @@ -593,7 +593,7 @@ |`cusparseZcsrilu02_bufferSize`| |12.2| | | | | | | |`cusparseZcsrilu02_bufferSizeExt`| |12.2| | | | | | | |`cusparseZcsrilu02_numericBoost`| |12.2| | | | | | | -|`cusparseZgpsvInterleavedBatch`|9.2| | | | | | | | +|`cusparseZgpsvInterleavedBatch`|9.2| | |`rocsparse_zgpsv_interleaved_batch`|5.1.0| | | | |`cusparseZgpsvInterleavedBatch_bufferSizeExt`|9.2| | | | | | | | |`cusparseZgtsv`| |10.2|11.0| | | | | | |`cusparseZgtsv2`|9.0| | | | | | | | diff --git a/src/CUDA2HIP_SPARSE_API_functions.cpp b/src/CUDA2HIP_SPARSE_API_functions.cpp index da4e4764..19b5ec1e 100644 --- a/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -365,6 +365,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseXbsric02_zeroPivot", {"hipsparseXbsric02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 12.2. Incomplete LU Factorization: level 0 + // NOTE: rocsparse_(s|d|c|z)csrilu0 have different signatures {"cusparseScsrilu0", {"hipsparseScsrilu0", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDcsrilu0", {"hipsparseDcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseCcsrilu0", {"hipsparseCcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -422,11 +423,13 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseXbsrilu02_zeroPivot", {"hipsparseXbsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 12.3. Tridiagonal Solve + // NOTE: rocsparse_(s|d|c|z)gtsv have an additional parameter void* temp_buffer {"cusparseSgtsv", {"hipsparseSgtsv", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDgtsv", {"hipsparseDgtsv", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseCgtsv", {"hipsparseCgtsv", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseZgtsv", {"hipsparseZgtsv", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_(s|d|c|z)gtsv_no_pivot have an additional parameter void* temp_buffer {"cusparseSgtsv_nopivot", {"hipsparseSgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDgtsv_nopivot", {"hipsparseDgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseCgtsv_nopivot", {"hipsparseCgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, 12, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -484,11 +487,10 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseCgpsvInterleavedBatch_bufferSizeExt", {"hipsparseCgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, {"cusparseZgpsvInterleavedBatch_bufferSizeExt", {"hipsparseZgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, - // NOTE: rocsparse_(s|d|c|z)gpsv_interleaved_batch have an additional parameter rocsparse_int batch_stride - {"cusparseSgpsvInterleavedBatch", {"hipsparseSgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, - {"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, - {"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, - {"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, 12, ROC_UNSUPPORTED}}, + {"cusparseSgpsvInterleavedBatch", {"hipsparseSgpsvInterleavedBatch", "rocsparse_sgpsv_interleaved_batch", CONV_LIB_FUNC, API_SPARSE, 12}}, + {"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", "rocsparse_dgpsv_interleaved_batch", CONV_LIB_FUNC, API_SPARSE, 12}}, + {"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", "rocsparse_cgpsv_interleaved_batch", CONV_LIB_FUNC, API_SPARSE, 12}}, + {"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", "rocsparse_zgpsv_interleaved_batch", CONV_LIB_FUNC, API_SPARSE, 12}}, // 13. cuSPARSE Matrix Reorderings Reference {"cusparseScsrcolor", {"hipsparseScsrcolor", "rocsparse_scsrcolor", CONV_LIB_FUNC, API_SPARSE, 13, CUDA_DEPRECATED}}, @@ -2133,6 +2135,10 @@ const std::map HIP_SPARSE_FUNCTION_VER_MAP { {"rocsparse_cnnz", {HIP_3020, HIP_0, HIP_0 }}, {"rocsparse_dnnz", {HIP_3020, HIP_0, HIP_0 }}, {"rocsparse_snnz", {HIP_3020, HIP_0, HIP_0 }}, + {"rocsparse_zgpsv_interleaved_batch", {HIP_5010, HIP_0, HIP_0 }}, + {"rocsparse_cgpsv_interleaved_batch", {HIP_5010, HIP_0, HIP_0 }}, + {"rocsparse_dgpsv_interleaved_batch", {HIP_5010, HIP_0, HIP_0 }}, + {"rocsparse_sgpsv_interleaved_batch", {HIP_5010, HIP_0, HIP_0 }}, }; const std::map CUDA_SPARSE_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Scripting.h b/src/CUDA2HIP_Scripting.h index ebbdb4f0..9f689a94 100644 --- a/src/CUDA2HIP_Scripting.h +++ b/src/CUDA2HIP_Scripting.h @@ -31,6 +31,7 @@ namespace hipify { e_int64_t, e_remove_argument, e_add_const_argument, + e_add_var_argument, e_move_argument, }; @@ -51,8 +52,8 @@ namespace hipify { CastTypes castType; CastWarning castWarn; std::string constValToAdd = ""; - unsigned moveTo = 0; - unsigned numberToMove = 1; + unsigned moveOrCopyTo = 0; + unsigned numberToMoveOrCopy = 1; }; typedef std::map ArgCastMap; diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 2c635d58..6485eb04 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -82,6 +82,10 @@ const std::string sCudnnSoftmaxBackward = "cudnnSoftmaxBackward"; const std::string sCudnnConvolutionForward = "cudnnConvolutionForward"; const std::string sCudnnConvolutionBackwardData = "cudnnConvolutionBackwardData"; const std::string sCudnnRNNBackwardWeights = "cudnnRNNBackwardWeights"; +const std::string sCusparseZgpsvInterleavedBatch = "cusparseZgpsvInterleavedBatch"; +const std::string sCusparseCgpsvInterleavedBatch = "cusparseCgpsvInterleavedBatch"; +const std::string sCusparseDgpsvInterleavedBatch = "cusparseDgpsvInterleavedBatch"; +const std::string sCusparseSgpsvInterleavedBatch = "cusparseSgpsvInterleavedBatch"; // CUDA_OVERLOADED const std::string sCudaEventCreate = "cudaEventCreate"; const std::string sCudaGraphInstantiate = "cudaGraphInstantiate"; @@ -104,6 +108,7 @@ std::string getCastType(hipify::CastTypes c) { case e_int64_t: return s_int64_t; case e_remove_argument: return ""; case e_add_const_argument: return ""; + case e_add_var_argument: return ""; case e_move_argument: return ""; default: return ""; } @@ -381,6 +386,42 @@ std::map FuncArgCasts { true } }, + {sCusparseZgpsvInterleavedBatch, + { + { + {9, {e_add_var_argument, cw_None, "", 10}} + }, + true, + false + } + }, + {sCusparseCgpsvInterleavedBatch, + { + { + {9, {e_add_var_argument, cw_None, "", 10}} + }, + true, + false + } + }, + {sCusparseDgpsvInterleavedBatch, + { + { + {9, {e_add_var_argument, cw_None, "", 10}} + }, + true, + false + } + }, + {sCusparseSgpsvInterleavedBatch, + { + { + {9, {e_add_var_argument, cw_None, "", 10}} + }, + true, + false + } + }, }; void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { @@ -857,20 +898,20 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) std::string sArg; clang::SmallString<40> dst_XStr; llvm::raw_svector_ostream dst_OS(dst_XStr); - if (c.second.numberToMove > 1) { - if ((argNum + c.second.numberToMove - 1) >= call->getNumArgs()) + if (c.second.numberToMoveOrCopy > 1) { + if ((argNum + c.second.numberToMoveOrCopy - 1) >= call->getNumArgs()) continue; - sr = call->getArg(argNum + c.second.numberToMove - 1)->getSourceRange(); + sr = call->getArg(argNum + c.second.numberToMoveOrCopy - 1)->getSourceRange(); sr.setBegin(call->getArg(argNum)->getBeginLoc()); } sArg = readSourceText(*SM, sr).str(); - if (c.second.moveTo < call->getNumArgs()) + if (c.second.moveOrCopyTo < call->getNumArgs()) dst_OS << sArg << ", "; else dst_OS << ", " << sArg; clang::SourceLocation dst_s; - if (c.second.moveTo < call->getNumArgs()) - dst_s = call->getArg(c.second.moveTo)->getBeginLoc(); + if (c.second.moveOrCopyTo < call->getNumArgs()) + dst_s = call->getArg(c.second.moveOrCopyTo)->getBeginLoc(); else dst_s = call->getEndLoc(); ct::Replacement dst_Rep(*SM, dst_s, 0, dst_OS.str()); @@ -878,7 +919,7 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) insertReplacement(dst_Rep, dst_fullSL); OS << ""; if (argNum < call->getNumArgs()) - e = call->getArg(argNum + c.second.numberToMove)->getBeginLoc(); + e = call->getArg(argNum + c.second.numberToMoveOrCopy)->getBeginLoc(); else e = call->getEndLoc(); length = SM->getCharacterData(e) - SM->getCharacterData(s); @@ -892,6 +933,23 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) OS << ", " << c.second.constValToAdd; break; } + case e_add_var_argument: + { + if (argNum >= call->getNumArgs()) + continue; + sr = call->getArg(argNum)->getSourceRange(); + sr.setBegin(call->getArg(argNum)->getBeginLoc()); + std::string sArg = readSourceText(*SM, sr).str(); + if (c.second.moveOrCopyTo < call->getNumArgs()) { + OS << sArg << ", "; + s = call->getArg(c.second.moveOrCopyTo)->getBeginLoc(); + } + else { + OS << ", " << sArg; + s = call->getEndLoc(); + } + break; + } default: OS << getCastType(c.second.castType) << "(" << readSourceText(*SM, sr) << ")"; length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(e, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(s); @@ -1065,7 +1123,11 @@ std::unique_ptr HipifyAction::CreateASTConsumer(clang::Compi sCudnnSoftmaxBackward, sCudnnConvolutionForward, sCudnnConvolutionBackwardData, - sCudnnRNNBackwardWeights + sCudnnRNNBackwardWeights, + sCusparseZgpsvInterleavedBatch, + sCusparseCgpsvInterleavedBatch, + sCusparseDgpsvInterleavedBatch, + sCusparseSgpsvInterleavedBatch ) ) ) diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu index cd3708d3..e2080f33 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu @@ -230,19 +230,32 @@ int main() { float fbscVal = 0.f; double dA = 0.f; float fA = 0.f; + int algo = 0; + double dds = 0.f; + double ddl = 0.f; + double dd = 0.f; + double ddu = 0.f; + double ddw = 0.f; + double dx = 0.f; + float fds = 0.f; + float fdl = 0.f; + float fd = 0.f; + float fdu = 0.f; + float fdw = 0.f; + float fx = 0.f; // CHECK: rocsparse_mat_info prune_info; pruneInfo_t prune_info; // TODO: should be rocsparse_double_complex // TODO: add to TypeOverloads cuDoubleComplex -> rocsparse_double_complex under a new option --sparse - // CHECK: rocblas_double_complex dcomplex, dcomplexA, dComplexbsrSortedValA, dComplexbsrSortedValC, dComplexcsrSortedValA, dComplexcsrSortedValC, dcomplextol, dComplexbsrSortedVal, dComplexbscVal, dComplexcscSortedVal; - cuDoubleComplex dcomplex, dcomplexA, dComplexbsrSortedValA, dComplexbsrSortedValC, dComplexcsrSortedValA, dComplexcsrSortedValC, dcomplextol, dComplexbsrSortedVal, dComplexbscVal, dComplexcscSortedVal; + // CHECK: rocblas_double_complex dcomplex, dcomplexA, dComplexbsrSortedValA, dComplexbsrSortedValC, dComplexcsrSortedValA, dComplexcsrSortedValC, dcomplextol, dComplexbsrSortedVal, dComplexbscVal, dComplexcscSortedVal, dcomplexds, dcomplexdl, dcomplexd, dcomplexdu, dcomplexdw, dcomplexx; + cuDoubleComplex dcomplex, dcomplexA, dComplexbsrSortedValA, dComplexbsrSortedValC, dComplexcsrSortedValA, dComplexcsrSortedValC, dcomplextol, dComplexbsrSortedVal, dComplexbscVal, dComplexcscSortedVal, dcomplexds, dcomplexdl, dcomplexd, dcomplexdu, dcomplexdw, dcomplexx; // TODO: should be rocsparse_double_complex // TODO: add to TypeOverloads cuComplex -> rocsparse_float_complex under a new option --sparse - // CHECK: rocblas_float_complex complex, complexA, complexbsrValA, complexbsrSortedValC, complexcsrSortedValA, complexcsrSortedValC, complextol, complexbsrSortedVal, complexbscVal, complexcscSortedVal; - cuComplex complex, complexA, complexbsrValA, complexbsrSortedValC, complexcsrSortedValA, complexcsrSortedValC, complextol, complexbsrSortedVal, complexbscVal, complexcscSortedVal; + // CHECK: rocblas_float_complex complex, complexA, complexbsrValA, complexbsrSortedValC, complexcsrSortedValA, complexcsrSortedValC, complextol, complexbsrSortedVal, complexbscVal, complexcscSortedVal, complexds, complexdl, complexd, complexdu, complexdw, complexx; + cuComplex complex, complexA, complexbsrValA, complexbsrSortedValC, complexcsrSortedValA, complexcsrSortedValC, complextol, complexbsrSortedVal, complexbscVal, complexcscSortedVal, complexds, complexdl, complexd, complexdu, complexdw, complexx; // CHECK: rocsparse_operation opA, opB; cusparseOperation_t opA, opB; @@ -804,6 +817,32 @@ int main() { status_t = cusparseSpruneDense2csr_bufferSizeExt(handle_t, m, n, &fA, lda, &fthreshold, matDescr_C, &fcsrSortedValC, &csrRowPtrC, &csrColIndC, &bufferSize); #endif +#if CUDA_VERSION >= 9020 + // NOTE: An additional argument rocsparse_int batch_stride is added for the rocsparse_zgpsv_interleaved_batch function call: the argument is copied from the previous one: rocsparse_int batch_count. It is how hipsparseZgpsvInterleavedBatch calls rocsparse_zgpsv_interleaved_batch in its implementation. + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch(cusparseHandle_t handle, int algo, int m, cuDoubleComplex* ds, cuDoubleComplex* dl, cuDoubleComplex* d, cuDoubleComplex* du, cuDoubleComplex* dw, cuDoubleComplex* x, int batchCount, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_zgpsv_interleaved_batch(rocsparse_handle handle, rocsparse_gpsv_interleaved_alg alg, rocsparse_int m, rocsparse_double_complex* ds, rocsparse_double_complex* dl, rocsparse_double_complex* d, rocsparse_double_complex* du, rocsparse_double_complex* dw, rocsparse_double_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride, void* temp_buffer); + // CHECK: status_t = rocsparse_zgpsv_interleaved_batch(handle_t, algo, m, &dcomplexds, &dcomplexdl, &dcomplexd, &dcomplexdu, &dcomplexdw, &dcomplexx, batchCount, batchCount, pBuffer); + status_t = cusparseZgpsvInterleavedBatch(handle_t, algo, m, &dcomplexds, &dcomplexdl, &dcomplexd, &dcomplexdu, &dcomplexdw, &dcomplexx, batchCount, pBuffer); + + // NOTE: An additional argument rocsparse_int batch_stride is added for the rocsparse_cgpsv_interleaved_batch function call: the argument is copied from the previous one: rocsparse_int batch_count. It is how hipsparseCgpsvInterleavedBatch calls rocsparse_cgpsv_interleaved_batch in its implementation. + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch(cusparseHandle_t handle, int algo, int m, cuComplex* ds, cuComplex* dl, cuComplex* d, cuComplex* du, cuComplex* dw, cuComplex* x, int batchCount, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cgpsv_interleaved_batch(rocsparse_handle handle, rocsparse_gpsv_interleaved_alg alg, rocsparse_int m, rocsparse_float_complex* ds, rocsparse_float_complex* dl, rocsparse_float_complex* d, rocsparse_float_complex* du, rocsparse_float_complex* dw, rocsparse_float_complex* x, rocsparse_int batch_count, rocsparse_int batch_stride, void* temp_buffer); + // CHECK: status_t = rocsparse_cgpsv_interleaved_batch(handle_t, algo, m, &complexds, &complexdl, &complexd, &complexdu, &complexdw, &complexx, batchCount, batchCount, pBuffer); + status_t = cusparseCgpsvInterleavedBatch(handle_t, algo, m, &complexds, &complexdl, &complexd, &complexdu, &complexdw, &complexx, batchCount, pBuffer); + + // NOTE: An additional argument rocsparse_int batch_stride is added for the rocsparse_dgpsv_interleaved_batch function call: the argument is copied from the previous one: rocsparse_int batch_count. It is how hipsparseDgpsvInterleavedBatch calls rocsparse_dgpsv_interleaved_batch in its implementation. + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch(cusparseHandle_t handle, int algo, int m, double* ds, double* dl, double* d, double* du, double* dw, double* x, int batchCount, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dgpsv_interleaved_batch(rocsparse_handle handle, rocsparse_gpsv_interleaved_alg alg, rocsparse_int m, double* ds, double* dl, double* d, double* du, double* dw, double* x, rocsparse_int batch_count, rocsparse_int batch_stride, void* temp_buffer); + // CHECK: status_t = rocsparse_dgpsv_interleaved_batch(handle_t, algo, m, &dds, &ddl, &dd, &ddu, &ddw, &dx, batchCount, batchCount, pBuffer); + status_t = cusparseDgpsvInterleavedBatch(handle_t, algo, m, &dds, &ddl, &dd, &ddu, &ddw, &dx, batchCount, pBuffer); + + // NOTE: An additional argument rocsparse_int batch_stride is added for the rocsparse_sgpsv_interleaved_batch function call: the argument is copied from the previous one: rocsparse_int batch_count. It is how hipsparseSgpsvInterleavedBatch calls rocsparse_sgpsv_interleaved_batch in its implementation. + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch(cusparseHandle_t handle, int algo, int m, float* ds, float* dl, float* d, float* du, float* dw, float* x, int batchCount, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_sgpsv_interleaved_batch(rocsparse_handle handle, rocsparse_gpsv_interleaved_alg alg, rocsparse_int m, float* ds, float* dl, float* d, float* du, float* dw, float* x, rocsparse_int batch_count, rocsparse_int batch_stride, void* temp_buffer); + // CHECK: status_t = rocsparse_sgpsv_interleaved_batch(handle_t, algo, m, &fds, &fdl, &fd, &fdu, &fdw, &fx, batchCount, batchCount, pBuffer); + status_t = cusparseSgpsvInterleavedBatch(handle_t, algo, m, &fds, &fdl, &fd, &fdu, &fdw, &fx, batchCount, pBuffer); +#endif + #if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: _rocsparse_spmat_descr *spMatDescr = nullptr; // CHECK-NEXT: rocsparse_spmat_descr spMatDescr_t, matC;