diff --git a/include/matx/operators/base_operator.h b/include/matx/operators/base_operator.h index 23e2d4ac..3fa57b5c 100644 --- a/include/matx/operators/base_operator.h +++ b/include/matx/operators/base_operator.h @@ -73,7 +73,7 @@ namespace matx tp->Exec(ex); } else if constexpr (is_matx_set_op()) { - if constexpr (static_cast(this)->IsTransformSet()) { + if constexpr (is_matx_transform_op() && is_tensor_view_v) { tp->TransformExec(tp->Shape(), ex); } else { diff --git a/include/matx/operators/concat.h b/include/matx/operators/concat.h index 50cd4636..f7429bc6 100644 --- a/include/matx/operators/concat.h +++ b/include/matx/operators/concat.h @@ -79,7 +79,7 @@ namespace matx { static_assert(RANK > 0, "Cannot concatenate rank-0 tensors"); static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to concatenate"); - static_assert((... && (RANK == ts.Rank())), "concatenated ops must have the same rank"); + static_assert((... && (RANK == Ts::Rank())), "concatenated ops must have the same rank"); for (int32_t i = 0; i < RANK; i++) { if(i == axis_) { diff --git a/include/matx/operators/isclose.h b/include/matx/operators/isclose.h index fc3f6917..53f8a3b7 100644 --- a/include/matx/operators/isclose.h +++ b/include/matx/operators/isclose.h @@ -55,7 +55,7 @@ namespace matx __MATX_INLINE__ IsCloseOp(const Op1 &op1, const Op2 &op2, double rtol, double atol) : op1_(op1), op2_(op2), rtol_(static_cast(rtol)), atol_(static_cast(atol)) { - static_assert(op1.Rank() == op2.Rank(), "Operator ranks must match in isclose()"); + static_assert(Op1::Rank() == Op2::Rank(), "Operator ranks must match in isclose()"); ASSERT_COMPATIBLE_OP_SIZES(op1); ASSERT_COMPATIBLE_OP_SIZES(op2); } diff --git a/include/matx/operators/stack.h b/include/matx/operators/stack.h index 3353aeb3..6f0554b3 100644 --- a/include/matx/operators/stack.h +++ b/include/matx/operators/stack.h @@ -77,7 +77,7 @@ namespace matx __MATX_INLINE__ StackOp(int axis, const Ts&... ts) : ops_(ts...), axis_(axis) { static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to stack"); - static_assert((... && (RANK == ts.Rank())), "stacked ops must have the same rank"); + static_assert((... && (RANK == Ts::Rank())), "stacked ops must have the same rank"); for (int32_t i = 0; i < RANK; i++) { MATX_ASSERT_STR(((ts.Size(i) == pp_get<0>(ts).Size(i)) && ...) diff --git a/include/matx/transforms/fft/fft_cuda.h b/include/matx/transforms/fft/fft_cuda.h index 1a429a3d..d0491ba0 100644 --- a/include/matx/transforms/fft/fft_cuda.h +++ b/include/matx/transforms/fft/fft_cuda.h @@ -643,7 +643,7 @@ __MATX_INLINE__ auto getCufft1DSupportedTensor( const Op &in, cudaStream_t strea template __MATX_INLINE__ auto getCufft2DSupportedTensor( const Op &in, cudaStream_t stream) { // This would be better as a templated lambda, but we don't have those in C++17 yet - const auto support_func = [&in]() { + const auto support_func = [&]() { if constexpr (is_tensor_view_v) { if ( in.Stride(Op::Rank()-2) != in.Stride(Op::Rank()-1) * in.Size(Op::Rank()-1)) { return false; diff --git a/include/matx/transforms/fft/fft_fftw.h b/include/matx/transforms/fft/fft_fftw.h index ffe7c243..2c7643f0 100644 --- a/include/matx/transforms/fft/fft_fftw.h +++ b/include/matx/transforms/fft/fft_fftw.h @@ -500,7 +500,7 @@ template class matxFFTWPlan_t { template __MATX_INLINE__ auto getFFTW1DSupportedTensor(const Op &in) { // This would be better as a templated lambda, but we don't have those in C++17 yet - const auto support_func = [&in]() { + const auto support_func = [&]() { if constexpr (is_tensor_view_v) { if constexpr (Op::Rank() >= 2) { if (in.Stride(Op::Rank() - 2) != in.Stride(Op::Rank() - 1) * in.Size(Op::Rank() - 1)) { @@ -527,7 +527,7 @@ template class matxFFTWPlan_t { template __MATX_INLINE__ auto getFFTW2DSupportedTensor( const Op &in) { // This would be better as a templated lambda, but we don't have those in C++17 yet - const auto support_func = [&in]() { + const auto support_func = [&]() { if constexpr (is_tensor_view_v) { if ( in.Stride(Op::Rank()-2) != in.Stride(Op::Rank()-1) * in.Size(Op::Rank()-1)) { return false; diff --git a/include/matx/transforms/matmul/matmul_cblas.h b/include/matx/transforms/matmul/matmul_cblas.h index c4b6eab3..e8e9a6aa 100644 --- a/include/matx/transforms/matmul/matmul_cblas.h +++ b/include/matx/transforms/matmul/matmul_cblas.h @@ -438,7 +438,7 @@ __MATX_INLINE__ void matmul_dispatch(TensorTypeC &c, template __MATX_INLINE__ auto getCBLASSupportedTensor( const Op &in) { // This would be better as a templated lambda, but we don't have those in C++17 yet - const auto support_func = [&in]() { + const auto support_func = [&]() { if constexpr (is_tensor_view_v) { return !( (in.Stride(Op::Rank() - 1) != (index_t)1 && in.Stride(Op::Rank() - 2) != (index_t)1) || diff --git a/include/matx/transforms/matmul/matmul_cuda.h b/include/matx/transforms/matmul/matmul_cuda.h index 37d7bafe..b1e42bc1 100644 --- a/include/matx/transforms/matmul/matmul_cuda.h +++ b/include/matx/transforms/matmul/matmul_cuda.h @@ -1102,7 +1102,7 @@ using gemm_cuda_cache_t = std::unordered_map __MATX_INLINE__ auto getCublasSupportedTensor( const Op &in, cudaStream_t stream) { // This would be better as a templated lambda, but we don't have those in C++17 yet - const auto support_func = [&in]() { + const auto support_func = [&]() { if constexpr (is_tensor_view_v) { return !( (in.Stride(Op::Rank()-1) != (index_t)1 && in.Stride(Op::Rank()-2) != (index_t)1) || diff --git a/include/matx/transforms/svd/svd_cuda.h b/include/matx/transforms/svd/svd_cuda.h index 9148b336..3c7b017a 100644 --- a/include/matx/transforms/svd/svd_cuda.h +++ b/include/matx/transforms/svd/svd_cuda.h @@ -551,13 +551,11 @@ static __MATX_INLINE__ SVDMethod GetCUDASVDMethod(const ATensor &a) { static constexpr int RANK = ATensor::Rank(); index_t m = a.Size(RANK - 2); index_t n = a.Size(RANK - 1); - SVDMethod method; - // This assumes the matrix sizes are fairly large, in which case gesvd should win out on speed - if (a.Rank() == 2) { - method = detail::SVDMethod::GESVD; - } - else { + // gesvd is a good default for non-batched + SVDMethod method = detail::SVDMethod::GESVD; + + if (a.Rank() != 2) { if (a.Size(RANK-2) <= 32 && a.Size(RANK-1) <= 32) { if constexpr (is_tensor_view_v) {