From 29887ce6fe0faae2c26ea48834b7ddd9e941ac99 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 1 Oct 2024 16:08:34 -0600 Subject: [PATCH 01/10] Add LINALG_FIX_RANK_UPDATES CMake option & macro This is for the implementation of P3371. It's OFF by default. Nothing currently uses the macro. --- CMakeLists.txt | 2 ++ include/experimental/__p1673_bits/linalg_config.h.in | 1 + 2 files changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d335c2ac..9e032923 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,8 @@ option(LINALG_FIX_TRANSPOSED_FOR_PADDED_LAYOUTS "Enable implementation of P3222 option(LINALG_FIX_CONJUGATED_FOR_NONCOMPLEX "Enable implementation of P3050 (Fix conjugated for noncomplex value types). OFF by default, though this will change if P3050 is voted into the C++ Standard Working Draft." OFF) +option(LINALG_FIX_RANK_UPDATES "Enable implementation of P3371 (Fix C++26 by making the rank-1, rank-2, rank-k, and rank-2k updates consistent with the BLAS). OFF by default, though this will change if P3371 is voted into the C++ Standard Working Draft." OFF) + ################################################################################ # Decide on the standard to use diff --git a/include/experimental/__p1673_bits/linalg_config.h.in b/include/experimental/__p1673_bits/linalg_config.h.in index 6dd1854a..81c72854 100644 --- a/include/experimental/__p1673_bits/linalg_config.h.in +++ b/include/experimental/__p1673_bits/linalg_config.h.in @@ -7,4 +7,5 @@ #cmakedefine LINALG_ENABLE_KOKKOS_DEFAULT #cmakedefine LINALG_ENABLE_TBB #cmakedefine LINALG_FIX_CONJUGATED_FOR_NONCOMPLEX +#cmakedefine LINALG_FIX_RANK_UPDATES #cmakedefine LINALG_FIX_TRANSPOSED_FOR_PADDED_LAYOUTS From 3006b00675f01a072a9b90334793ca9a6290a904 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 2 Oct 2024 09:11:46 -0600 Subject: [PATCH 02/10] P3371R1: Implement hermitian_matrix_rank_1_update Change existing hermitian_matrix_rank_1_update overloads to be overwriting instead of unconditionally updating, and add updating hermitian_matrix_rank_1_update overloads. Guard changes with the appropriate macro, which by default is NOT defined. Add lots of tests to ensure coverage of all cases. Tests pass whether or not P3371 support is enabled. --- .../blas2_matrix_rank_1_update.hpp | 526 ++++++++++++++++-- tests/native/her.cpp | 471 +++++++++++++--- tests/native/syr.cpp | 4 + 3 files changed, 876 insertions(+), 125 deletions(-) diff --git a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp index 1a1fc446..9299aa66 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp @@ -48,7 +48,7 @@ namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { inline namespace __p1673_version_0 { namespace linalg { -namespace { +namespace { // (anonymous) template struct is_custom_matrix_rank_1_update_avail : std::false_type {}; @@ -114,50 +114,130 @@ struct is_custom_symmetric_matrix_rank_1_update_avail< : std::true_type {}; +#if defined(LINALG_FIX_RANK_UPDATES) + +// For the overwriting case, use E_t = void. +// For the no-alpha case, use ScaleFactorType = void. +template +struct is_custom_hermitian_matrix_rank_1_update_avail : std::false_type +{}; + +// Overwriting, ExecutionPolicy != inline_exec_t, no alpha +template +struct is_custom_hermitian_matrix_rank_1_update_avail< + Exec, /* ScaleFactorType = */ void, x_t, /* E_t = */ void, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Updating, ExecutionPolicy != inline_exec_t, no alpha +template +struct is_custom_hermitian_matrix_rank_1_update_avail< + Exec, /* ScaleFactorType = */ void, x_t, E_t, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Overwriting, ExecutionPolicy != inline_exec_t, alpha +template +struct is_custom_hermitian_matrix_rank_1_update_avail< + Exec, ScaleFactorType, x_t, /* E_t = */ void, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Updating, ExecutionPolicy != inline_exec_t, alpha +template +struct is_custom_hermitian_matrix_rank_1_update_avail< + Exec, ScaleFactorType, x_t, E_t, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +#else + template struct is_custom_hermitian_matrix_rank_1_update_avail : std::false_type {}; +// ExecutionPolicy != inline_exec_t, no alpha template struct is_custom_hermitian_matrix_rank_1_update_avail< - Exec, void, x_t, A_t, Tr_t, + Exec, /* ScaleFactorType = */ void, x_t, A_t, Tr_t, std::enable_if_t< std::is_void_v< - decltype(hermitian_matrix_rank_1_update - (std::declval(), - std::declval(), - std::declval(), - std::declval() - ) - ) - > - && ! impl::is_inline_exec_v - > + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v > - : std::true_type +> : std::true_type {}; +// ExecutionPolicy != inline_exec_t, alpha template struct is_custom_hermitian_matrix_rank_1_update_avail< Exec, ScaleFactorType, x_t, A_t, Tr_t, std::enable_if_t< std::is_void_v< - decltype(hermitian_matrix_rank_1_update - (std::declval(), - std::declval(), - std::declval(), - std::declval(), - std::declval() - ) - ) - > - && ! impl::is_inline_exec_v + decltype(hermitian_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) > + && ! impl::is_inline_exec_v > - : std::true_type +> : std::true_type {}; -} // end anonymous namespace +#endif + +} // namespace (anonymous) // Nonsymmetric non-conjugated rank-1 update @@ -502,10 +582,12 @@ void symmetric_matrix_rank_1_update( symmetric_matrix_rank_1_update(impl::default_exec_t{}, x, A, t); } -// Rank-k update of a Hermitian matrix +// Hermitian matrix rank-k update -// Rank-1 update of a Hermitian matrix with scaling factor alpha +// Hermitian matrix rank-1 update +// Overwriting Hermitian rank-1 matrix update +// (inline_exec_t, alpha) MDSPAN_TEMPLATE_REQUIRES( class ScaleFactorType, class ElementType_x, @@ -515,7 +597,7 @@ MDSPAN_TEMPLATE_REQUIRES( class ElementType_A, class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, - class Layout_A, + class Layout_A, // TODO Implement Constraints and Mandates class Accessor_A, class Triangle, /* requires */ ( @@ -530,26 +612,102 @@ void hermitian_matrix_rank_1_update( mdspan, Layout_A, Accessor_A> A, Triangle /* t */) { - using size_type = std::common_type_t; + using index_type = std::common_type_t; if constexpr (std::is_same_v) { - for (size_type j = 0; j < A.extent(1); ++j) { - A(j,j) = impl::real_if_needed(A(j,j)); - for (size_type i = j; i < A.extent(0); ++i) { + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = alpha * x(i) * impl::conj_if_needed(x(j)); +#else + auto A_ij = i == j ? impl::real_if_needed(A(i,j)) : A(i,j); + A(i,j) = A_ij + alpha * x(i) * impl::conj_if_needed(x(j)); +#endif // LINALG_FIX_RANK_UPDATES + } + } + } + else { // upper triangle + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = typename decltype(A)::value_type{}; +#else + if (i == j) { + A(j,j) = impl::real_if_needed(A(j,j)); + } +#endif // LINALG_FIX_RANK_UPDATES + A(i,j) += alpha * x(i) * impl::conj_if_needed(x(j)); + } + } + } +} + +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (inline_exec_t, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void hermitian_matrix_rank_1_update( + impl::inline_exec_t&& /* exec */, + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle /* t */) +{ + using index_type = std::common_type_t; + + if constexpr (std::is_same_v) { + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = j; i < A.extent(0); ++i) { + if (i == j) { + A(j,j) = impl::real_if_needed(E(j,j)); + } + else { + A(i,j) = E(i,j); + } A(i,j) += alpha * x(i) * impl::conj_if_needed(x(j)); } } } else { - for (size_type j = 0; j < A.extent(1); ++j) { - A(j,j) = impl::real_if_needed(A(j,j)); - for (size_type i = 0; i <= j; ++i) { + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { + if (i == j) { + A(j,j) = impl::real_if_needed(E(j,j)); + } + else { + A(i,j) = E(i,j); + } A(i,j) += alpha * x(i) * impl::conj_if_needed(x(j)); } } } } +#endif // LINALG_FIX_RANK_UPDATES +// Overwriting Hermitian rank-1 matrix update +// (ExecutionPolicy&&, alpha) MDSPAN_TEMPLATE_REQUIRES( class ExecutionPolicy, class ScaleFactorType, @@ -560,7 +718,7 @@ MDSPAN_TEMPLATE_REQUIRES( class ElementType_A, class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, - class Layout_A, + class Layout_A, // TODO Implement Constraints and Mandates class Accessor_A, class Triangle, /* requires */ ( @@ -577,7 +735,14 @@ void hermitian_matrix_rank_1_update( Triangle t) { constexpr bool use_custom = is_custom_hermitian_matrix_rank_1_update_avail< - decltype(execpolicy_mapper(exec)), ScaleFactorType, decltype(x), decltype(A), Triangle + decltype(execpolicy_mapper(exec)), + ScaleFactorType, + decltype(x), +#if defined(LINALG_FIX_RANK_UPDATES) + /* decltype(E) = */ void, +#endif // LINALG_FIX_RANK_UPDATES + decltype(A), + Triangle >::value; if constexpr (use_custom) { @@ -588,16 +753,74 @@ void hermitian_matrix_rank_1_update( } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (ExecutionPolicy&&, alpha) MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, class ScaleFactorType, class ElementType_x, class SizeType_x, ::std::size_t ext_x, class Layout_x, class Accessor_x, + class ElementType_E, + // Work-around for MDSPAN_TEMPLATE_REQUIRES not taking too many arguments: + // combine (SizeType_E, numRows_E, numCols_E) into Extents_E + // and add rank() == 2 constraint. + class Extents_E, + class Layout_E, + class Accessor_E, class ElementType_A, class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, - class Layout_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_1_update( + ExecutionPolicy&& exec, + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + constexpr bool use_custom = is_custom_hermitian_matrix_rank_1_update_avail< + decltype(execpolicy_mapper(exec)), + ScaleFactorType, + decltype(x), + decltype(E), + decltype(A), + Triangle + >::value; + + if constexpr (use_custom) { + hermitian_matrix_rank_1_update(execpolicy_mapper(exec), alpha, x, E, A, t); + } + else { + hermitian_matrix_rank_1_update(impl::inline_exec_t{}, alpha, x, E, A, t); + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting Hermitian rank-1 matrix update +// (no execution policy, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates class Accessor_A, class Triangle, /* requires */ ( @@ -615,8 +838,45 @@ void hermitian_matrix_rank_1_update( hermitian_matrix_rank_1_update(impl::default_exec_t{}, alpha, x, A, t); } -// Rank-1 update of a Hermitian matrix without scaling factor alpha +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (no execution policy, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + (! impl::is_linalg_execution_policy_other_than_inline_v) && + (std::is_same_v || + std::is_same_v) + ) +) +void hermitian_matrix_rank_1_update( + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + hermitian_matrix_rank_1_update(impl::default_exec_t{}, alpha, x, E, A, t); +} +#endif // LINALG_FIX_RANK_UPDATES +// Overwriting Hermitian rank-1 matrix update +// (inline_exec_t, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ElementType_x, class SizeType_x, ::std::size_t ext_x, @@ -639,26 +899,100 @@ void hermitian_matrix_rank_1_update( mdspan, Layout_A, Accessor_A> A, Triangle /* t */) { - using size_type = std::common_type_t; + using index_type = std::common_type_t; if constexpr (std::is_same_v) { - for (size_type j = 0; j < A.extent(1); ++j) { - A(j,j) = impl::real_if_needed(A(j,j)); - for (size_type i = j; i < A.extent(0); ++i) { - A(i,j) += x(i) * impl::conj_if_needed(x(j)); + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = x(i) * impl::conj_if_needed(x(j)); +#else + auto A_ij = i == j ? impl::real_if_needed(A(i,j)) : A(i,j); + A(i,j) = A_ij + x(i) * impl::conj_if_needed(x(j)); +#endif // LINALG_FIX_RANK_UPDATES } } } - else { - for (size_type j = 0; j < A.extent(1); ++j) { - A(j,j) = impl::real_if_needed(A(j,j)); - for (size_type i = 0; i <= j; ++i) { + else { // upper triangle + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = typename decltype(A)::value_type{}; +#else + if (i == j) { + A(j,j) = impl::real_if_needed(A(j,j)); + } +#endif // LINALG_FIX_RANK_UPDATES A(i,j) += x(i) * impl::conj_if_needed(x(j)); } } } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (inline_exec_t, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void hermitian_matrix_rank_1_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle /* t */) +{ + using index_type = std::common_type_t; + + if constexpr (std::is_same_v) { + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { + if (i == j) { + A(i,j) = impl::real_if_needed(E(i,j)) + x(i) * impl::conj_if_needed(x(j)); + } + else { + A(i,j) = E(i,j) + x(i) * impl::conj_if_needed(x(j)); + } + } + } + } + else { // upper triangle + printf("OH HAI MARK\n"); + + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { + if (i == j) { + A(i,j) = impl::real_if_needed(E(i,j)) + x(i) * impl::conj_if_needed(x(j)); + } + else { + A(i,j) = E(i,j) + x(i) * impl::conj_if_needed(x(j)); + } + } + } + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting Hermitian rank-1 matrix update +// (ExecutionPolicy&&, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ExecutionPolicy, class ElementType_x, @@ -684,7 +1018,14 @@ void hermitian_matrix_rank_1_update( Triangle t) { constexpr bool use_custom = is_custom_hermitian_matrix_rank_1_update_avail< - decltype(impl::map_execpolicy_with_check(exec)), void, decltype(x), decltype(A), Triangle + decltype(impl::map_execpolicy_with_check(exec)), + /* ScaleFactorType = */ void, + decltype(x), +#if defined(LINALG_FIX_RANK_UPDATES) + /* decltype(E) = */ void, +#endif // LINALG_FIX_RANK_UPDATES + decltype(A), + Triangle >::value; if constexpr (use_custom) { @@ -695,6 +1036,59 @@ void hermitian_matrix_rank_1_update( } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (ExecutionPolicy&&, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) + ) +) +void hermitian_matrix_rank_1_update( + ExecutionPolicy&& exec, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + constexpr bool use_custom = is_custom_hermitian_matrix_rank_1_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + /* ScaleFactorType = */ void, + decltype(x), + decltype(E), + decltype(A), + Triangle + >::value; + + if constexpr (use_custom) { + hermitian_matrix_rank_1_update(impl::map_execpolicy_with_check(exec), x, E, A, t); + } + else { + hermitian_matrix_rank_1_update(impl::inline_exec_t{}, x, E, A, t); + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting Hermitian rank-1 matrix update +// (no execution policy, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ElementType_x, class SizeType_x, ::std::size_t ext_x, @@ -719,6 +1113,40 @@ void hermitian_matrix_rank_1_update( hermitian_matrix_rank_1_update(impl::default_exec_t{}, x, A, t); } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating Hermitian rank-1 matrix update +// (no execution policy, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void hermitian_matrix_rank_1_update( + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + hermitian_matrix_rank_1_update(impl::default_exec_t{}, x, E, A, t); +} +#endif // LINALG_FIX_RANK_UPDATES + } // end namespace linalg } // end inline namespace __p1673_version_0 } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE diff --git a/tests/native/her.cpp b/tests/native/her.cpp index c1e04953..aee3b320 100644 --- a/tests/native/her.cpp +++ b/tests/native/her.cpp @@ -5,100 +5,419 @@ namespace { using LinearAlgebra::hermitian_matrix_rank_1_update; using LinearAlgebra::upper_triangle; - // Regression test for ambiguous overloads of - // hermitian_matrix_rank_1_update (related to - // https://github.com/kokkos/stdBLAS/issues/261 ). + // A = [-1.0 -2.0 -4.0] + // [-2.0 -3.0 -5.0] + // [-4.0 -5.0 -6.0] // - // The reference implementation needs to implement all constraints - // of hermitian_matrix_rank_1_update in order to disambiguate - // overloads. - TEST(BLAS3_her, AmbiguousOverloads) - { - constexpr auto map_A = layout_right::mapping{extents{}}; - constexpr auto map_expected = map_A; - constexpr auto map_x = layout_right::mapping{extents{}}; + // x = [ 2.0 + 7.0i] + // [ 5.0] + // [ 11.0] + // + // x x^H = [53.0 10.0 + 35.0i 22.0 + 77.0i] + // [10.0 - 35.0i 25.0 55.0 ] + // [22.0 - 77.0i 55.0 121.0 ] + // + // A + x x^H = [52.0 8.0 + 35.0i 18.0 + 77.0i] + // [ 8.0 - 35.0i 22.0 50.0] + // [18.0 - 77.0i 50.0 115.0] + class her_test_problem { + public: using V = std::complex; - // A = [-1.0 -2.0 -4.0] - // [-2.0 -3.0 -5.0] - // [-4.0 -5.0 -6.0] - // - // x = [ 2.0 + 7.0i] - // [ 5.0] - // [ 11.0] - // - // x x^H = [53.0 10.0 + 35.0i 22.0 + 77.0i] - // [10.0 - 35.0i 25.0 55.0 ] - // [22.0 - 77.0i 55.0 121.0 ] - // - // A + x x^H = [52.0 8.0 + 35.0i 18.0 + 77.0i] - // [ 8.0 - 35.0i 22.0 50.0] - // [18.0 - 77.0i 50.0 115.0] - constexpr std::array A_storage_original{ - V(-1.0, 0.0), V(-2.0, 0.0), V(-4.0, 0.0), - V(-2.0, 0.0), V(-3.0, 0.0), V(-5.0, 0.0), - V(-4.0, 0.0), V(-5.0, 0.0), V(-6.0, 0.0) + std::vector A_original{ + V(-1.0, 0.0), V(-2.0, 0.0), V(-4.0, 0.0), + V(-2.0, 0.0), V(-3.0, 0.0), V(-5.0, 0.0), + V(-4.0, 0.0), V(-5.0, 0.0), V(-6.0, 0.0) }; - constexpr std::array x_storage_original{ + + std::vector A{ + V(-1.0, 0.0), V(-2.0, 0.0), V(-4.0, 0.0), + V(-2.0, 0.0), V(-3.0, 0.0), V(-5.0, 0.0), + V(-4.0, 0.0), V(-5.0, 0.0), V(-6.0, 0.0) + }; + + std::vector x{ V( 2.0, 7.0), V( 5.0, 0.0), V(11.0, 0.0) }; - constexpr std::array expected_storage_original{ - V(52.0, 0.0), V( 8.0, 35.0), V( 18.0, 77.0), - V( 8.0, -35.0), V(22.0, 0.0), V( 50.0, 0.0), - V(18.0, -77.0), V(50.0, 0.0), V(115.0, 0.0) + + std::vector A_plus_x_xH = { + V(52.0, 0.0), V( 8.0, 35.0), V( 18.0, 77.0), + V( 8.0, -35.0), V(22.0, 0.0), V( 50.0, 0.0), + V(18.0, -77.0), V(50.0, 0.0), V(115.0, 0.0) + }; + +#if defined(LINALG_FIX_RANK_UPDATES) + std::vector x_xH = { + V(53.0, 0.0), V(10.0, 35.0), V( 22.0, 77.0), + V(10.0, -35.0), V(25.0, 0.0), V( 55.0, 0.0), + V(22.0, -77.0), V(55.0, 0.0), V(121.0, 0.0) + }; + + std::vector two_x_xH = { + V(106.0, 0.0), V( 20.0, 70.0), V( 44.0, 154.0), + V( 20.0, -70.0), V( 50.0, 0.0), V(110.0, 0.0), + V( 44.0, -154.0), V(110.0, 0.0), V(242.0, 0.0) }; - auto A_storage = A_storage_original; - mdspan A{A_storage.data(), map_A}; + std::vector A_plus_two_x_xH = { + V(105.0, 0.0), V( 18.0, 70.0), V( 40.0, 154.0), + V( 18.0, -70.0), V( 47.0, 0.0), V(105.0, 0.0), + V( 40.0, -154.0), V(105.0, 0.0), V(236.0, 0.0) + }; +#endif // LINALG_FIX_RANK_UPDATES + + void refresh() { + A = std::vector{ + V(-1.0, 0.0), V(-2.0, 0.0), V(-4.0, 0.0), + V(-2.0, 0.0), V(-3.0, 0.0), V(-5.0, 0.0), + V(-4.0, 0.0), V(-5.0, 0.0), V(-6.0, 0.0) + }; + } + + using A_type = mdspan>; + using const_A_type = mdspan>; + using x_type = mdspan>; + using result_type = mdspan>; + + A_type A_view() { + return A_type{A.data()}; + } + + const_A_type A_original_view() const { + return const_A_type{A_original.data()}; + } - auto expected_storage = expected_storage_original; - mdspan expected{expected_storage.data(), map_expected}; + x_type x_view() const { + return x_type{x.data()}; + } - auto x_storage = x_storage_original; - mdspan x{x_storage.data(), map_x}; + result_type A_plus_x_xH_view() const { + return result_type{A_plus_x_xH.data()}; + } - auto check_upper_triangle = [&] () { - for (std::size_t row = 0; row < A.extent(0); ++row) { - for (std::size_t col = row; col < A.extent(1); ++col) { - const auto expected_rc = expected(row, col); - const auto A_rc = A(row, col); - EXPECT_EQ(expected_rc, A_rc) << "at (" << row << ", " << col << ")"; +#if defined(LINALG_FIX_RANK_UPDATES) + result_type x_xH_view() const { + return result_type{x_xH.data()}; + } + + result_type two_x_xH_view() const { + return result_type{two_x_xH.data()}; + } + + result_type A_plus_two_x_xH_view() const { + return result_type{A_plus_two_x_xH.data()}; + } +#endif // LINALG_FIX_RANK_UPDATES + }; + + // This also serves as a regression test for ambiguous overloads of + // hermitian_matrix_rank_1_update (related to + // https://github.com/kokkos/stdBLAS/issues/261 ). + // + // The reference implementation needs to implement all constraints + // of hermitian_matrix_rank_1_update in order to disambiguate + // overloads. + TEST(BLAS3_her, upper_triangle) + { + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xH = problem.A_plus_x_xH_view(); + const char what[] = " triangle of A = A + 1.0 x x^H (upper)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + hermitian_matrix_rank_1_update(1.0, x, A, A, upper_triangle); +#else + hermitian_matrix_rank_1_update(1.0, x, A, upper_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; } } - }; - auto check_lower_triangle = [&] () { - for (std::size_t row = 0; row < A.extent(0); ++row) { - for (std::size_t col = 0; col <= row; ++col) { - const auto expected_rc = expected(row, col); - const auto A_rc = A(row, col); - EXPECT_EQ(expected_rc, A_rc) << "at (" << row << ", " << col << ")"; + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xH = problem.A_plus_x_xH_view(); + const char what[] = " triangle of A = A + x x^H (upper)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + hermitian_matrix_rank_1_update(x, A, A, upper_triangle); +#else + hermitian_matrix_rank_1_update(x, A, upper_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; } } - }; + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xH = problem.x_xH_view(); + const char what[] = " triangle of A = 1.0 x x^H (upper)"; - hermitian_matrix_rank_1_update(1.0, x, A, upper_triangle); - check_upper_triangle(); - - // Reset values, just in case some bug might have overwritten them. - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - hermitian_matrix_rank_1_update(1.0, x, A, lower_triangle); - check_lower_triangle(); - - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - hermitian_matrix_rank_1_update(x, A, upper_triangle); - check_upper_triangle(); - - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - hermitian_matrix_rank_1_update(x, A, lower_triangle); - check_lower_triangle(); + hermitian_matrix_rank_1_update(1.0, x, A, upper_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xH = problem.A_plus_x_xH_view(); + const char what[] = " triangle of A = 2.0 x x^H (upper)"; + + hermitian_matrix_rank_1_update(2.0, x, A, upper_triangle); + mdspan two_x_xH = problem.two_x_xH_view(); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), two_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_two_x_xH = problem.A_plus_two_x_xH_view(); + const char what[] = " triangle of A = A + 2.0 x x^H (upper)"; + + hermitian_matrix_rank_1_update(2.0, x, A, A, upper_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES } + TEST(BLAS3_her, lower_triangle) + { + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xH = problem.A_plus_x_xH_view(); + const char what[] = " triangle of A = A + 1.0 x x^H (lower)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + hermitian_matrix_rank_1_update(1.0, x, A, A, lower_triangle); +#else + hermitian_matrix_rank_1_update(1.0, x, A, lower_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xH = problem.A_plus_x_xH_view(); + const char what[] = " triangle of A = A + x x^H (lower)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + hermitian_matrix_rank_1_update(x, A, A, lower_triangle); +#else + hermitian_matrix_rank_1_update(x, A, lower_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xH = problem.x_xH_view(); + const char what[] = " triangle of A = x x^H (lower)"; + + hermitian_matrix_rank_1_update(x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xH = problem.x_xH_view(); + const char what[] = " triangle of A = 1.0 x x^H (lower)"; + + hermitian_matrix_rank_1_update(1.0, x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan two_x_xH = problem.two_x_xH_view(); + const char what[] = " triangle of A = 2.0 x x^H (lower)"; + + hermitian_matrix_rank_1_update(2.0, x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), two_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + her_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_two_x_xH = problem.A_plus_two_x_xH_view(); + const char what[] = " triangle of A = A + 2.0 x x^H (lower)"; + + hermitian_matrix_rank_1_update(2.0, x, A, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_xH(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES + } } // end anonymous namespace diff --git a/tests/native/syr.cpp b/tests/native/syr.cpp index 3daedecf..e8e82381 100644 --- a/tests/native/syr.cpp +++ b/tests/native/syr.cpp @@ -33,6 +33,10 @@ namespace { // A + x x^T = [3.0 8.0 18.0] // [8.0 22.0 50.0] // [18.0 50.0 115.0] + // + // A + 2.0 x x^T = [3.0 8.0 18.0] + // [8.0 22.0 50.0] + // [18.0 50.0 115.0] constexpr std::array A_storage_original{ -1.0, -2.0, -4.0, -2.0, -3.0, -5.0, From b287f2995c2ed76aad083955fb63f557cd0e26a2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 3 Oct 2024 14:29:13 -0600 Subject: [PATCH 03/10] P3371R1: Implement symmetric_matrix_rank_1_update Change existing symmetric_matrix_rank_1_update overloads to be overwriting instead of unconditionally updating, and add updating symmetric_matrix_rank_1_update overloads. Guard changes with the appropriate macro, which by default is NOT defined. Add lots of tests to ensure coverage of all cases. Tests pass whether or not P3371 support is enabled. --- .../blas2_matrix_rank_1_update.hpp | 439 +++++++++++++++- tests/native/her.cpp | 8 - tests/native/syr.cpp | 478 +++++++++++++++--- 3 files changed, 815 insertions(+), 110 deletions(-) diff --git a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp index 9299aa66..328bea5a 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp @@ -71,10 +71,93 @@ struct is_custom_matrix_rank_1_update_avail< > : std::true_type{}; +#if defined(LINALG_FIX_RANK_UPDATES) + +// For the overwriting case, use E_t = void. +// For the no-alpha case, use ScaleFactorType = void. +template +struct is_custom_symmetric_matrix_rank_1_update_avail : std::false_type +{}; + +// Overwriting, ExecutionPolicy != inline_exec_t, no alpha +template +struct is_custom_symmetric_matrix_rank_1_update_avail< + Exec, /* ScaleFactorType = */ void, x_t, /* E_t = */ void, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Updating, ExecutionPolicy != inline_exec_t, no alpha +template +struct is_custom_symmetric_matrix_rank_1_update_avail< + Exec, /* ScaleFactorType = */ void, x_t, E_t, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Overwriting, ExecutionPolicy != inline_exec_t, alpha +template +struct is_custom_symmetric_matrix_rank_1_update_avail< + Exec, ScaleFactorType, x_t, /* E_t = */ void, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_1_update( + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Updating, ExecutionPolicy != inline_exec_t, alpha +template +struct is_custom_symmetric_matrix_rank_1_update_avail< + Exec, ScaleFactorType, x_t, E_t, A_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_1_update( + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval(), // implies not void + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +#else + template struct is_custom_symmetric_matrix_rank_1_update_avail : std::false_type {}; +// ExecutionPolicy != inline_exec_t, no alpha template struct is_custom_symmetric_matrix_rank_1_update_avail< Exec, void, x_t, A_t, Tr_t, @@ -94,6 +177,7 @@ struct is_custom_symmetric_matrix_rank_1_update_avail< : std::true_type {}; +// ExecutionPolicy != inline_exec_t, alpha template struct is_custom_symmetric_matrix_rank_1_update_avail< Exec, ScaleFactorType, x_t, A_t, Tr_t, @@ -114,6 +198,8 @@ struct is_custom_symmetric_matrix_rank_1_update_avail< : std::true_type {}; +#endif // LINALG_FIX_RANK_UPDATES + #if defined(LINALG_FIX_RANK_UPDATES) // For the overwriting case, use E_t = void. @@ -369,10 +455,10 @@ void matrix_rank_1_update_c( matrix_rank_1_update(exec, x, conjugated(y), A); } -// Rank-1 update of a symmetric matrix - -// Rank-1 update of a symmetric matrix with scaling factor alpha +// Symmetric matrix rank-1 update +// Overwriting symmetric rank-1 matrix update +// (inline_exec_t, alpha) MDSPAN_TEMPLATE_REQUIRES( class ScaleFactorType, class ElementType_x, @@ -397,24 +483,84 @@ void symmetric_matrix_rank_1_update( mdspan, Layout_A, Accessor_A> A, Triangle /* t */) { - using size_type = std::common_type_t; + using index_type = std::common_type_t; if constexpr (std::is_same_v) { - for (size_type j = 0; j < A.extent(1); ++j) { - for (size_type i = j; i < A.extent(0); ++i) { + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = typename decltype(A)::value_type{}; +#endif // LINALG_FIX_RANK_UPDATES A(i,j) += alpha * x(i) * x(j); } } } - else { - for (size_type j = 0; j < A.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { + else { // upper triangle + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = typename decltype(A)::value_type{}; +#endif // LINALG_FIX_RANK_UPDATES A(i,j) += alpha * x(i) * x(j); } } } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (inline_exec_t, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void symmetric_matrix_rank_1_update( + impl::inline_exec_t&& /* exec */, + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle /* t */) +{ + using index_type = std::common_type_t; + + if constexpr (std::is_same_v) { + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = j; i < A.extent(0); ++i) { + A(i,j) = E(i,j) + alpha * x(i) * x(j); + } + } + } + else { + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { + A(i,j) = E(i,j) + alpha * x(i) * x(j); + } + } + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting symmetric rank-1 matrix update +// (ExecutionPolicy&&, alpha) MDSPAN_TEMPLATE_REQUIRES( class ExecutionPolicy, class ScaleFactorType, @@ -442,7 +588,14 @@ void symmetric_matrix_rank_1_update( Triangle t) { constexpr bool use_custom = is_custom_symmetric_matrix_rank_1_update_avail< - decltype(execpolicy_mapper(exec)), ScaleFactorType, decltype(x), decltype(A), Triangle + decltype(execpolicy_mapper(exec)), + ScaleFactorType, + decltype(x), +#if defined(LINALG_FIX_RANK_UPDATES) + /* decltype(E) = */ void, +#endif // LINALG_FIX_RANK_UPDATES + decltype(A), + Triangle >::value; if constexpr (use_custom) { @@ -453,6 +606,64 @@ void symmetric_matrix_rank_1_update( } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (ExecutionPolicy&&, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + // Work-around for MDSPAN_TEMPLATE_REQUIRES not taking too many arguments: + // combine (SizeType_E, numRows_E, numCols_E) into Extents_E + // and add rank() == 2 constraint. + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void symmetric_matrix_rank_1_update( + ExecutionPolicy&& exec, + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + constexpr bool use_custom = is_custom_symmetric_matrix_rank_1_update_avail< + decltype(execpolicy_mapper(exec)), + ScaleFactorType, + decltype(x), + decltype(E), + decltype(A), + Triangle + >::value; + + if constexpr (use_custom) { + symmetric_matrix_rank_1_update(execpolicy_mapper(exec), alpha, x, E, A, t); + } + else { + symmetric_matrix_rank_1_update(impl::inline_exec_t{}, alpha, x, E, A, t); + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting symmetric rank-1 matrix update +// (no execution policy, alpha) MDSPAN_TEMPLATE_REQUIRES( class ScaleFactorType, class ElementType_x, @@ -480,8 +691,45 @@ void symmetric_matrix_rank_1_update( symmetric_matrix_rank_1_update(impl::default_exec_t{}, alpha, x, A, t); } -// Rank-1 update of a symmetric matrix without scaling factor alpha +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (no execution policy, alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, // TODO Implement Constraints and Mandates + class Accessor_A, + class Triangle, + /* requires */ ( + (! impl::is_linalg_execution_policy_other_than_inline_v) && + (std::is_same_v || + std::is_same_v) + ) +) +void symmetric_matrix_rank_1_update( + ScaleFactorType alpha, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + symmetric_matrix_rank_1_update(impl::default_exec_t{}, alpha, x, E, A, t); +} +#endif // LINALG_FIX_RANK_UPDATES +// Overwriting symmetric rank-1 matrix update +// (inline_exec_t, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ElementType_x, class SizeType_x, ::std::size_t ext_x, @@ -504,24 +752,83 @@ void symmetric_matrix_rank_1_update( mdspan, Layout_A, Accessor_A> A, Triangle /* t */) { - using size_type = std::common_type_t; + using index_type = std::common_type_t; if constexpr (std::is_same_v) { - for (size_type j = 0; j < A.extent(1); ++j) { - for (size_type i = j; i < A.extent(0); ++i) { + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = x(i) * x(j); +#else A(i,j) += x(i) * x(j); +#endif // LINALG_FIX_RANK_UPDATES } } } - else { - for (size_type j = 0; j < A.extent(1); ++j) { - for (size_type i = 0; i <= j; ++i) { + else { // upper triangle + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) + A(i,j) = typename decltype(A)::value_type{}; +#endif // LINALG_FIX_RANK_UPDATES A(i,j) += x(i) * x(j); } } } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (inline_exec_t, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void symmetric_matrix_rank_1_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle /* t */) +{ + using index_type = std::common_type_t; + + if constexpr (std::is_same_v) { + for (index_type i = 0; i < A.extent(0); ++i) { + for (index_type j = 0; j <= i; ++j) { + A(i,j) = E(i,j) + x(i) * x(j); + } + } + } + else { // upper triangle + for (index_type j = 0; j < A.extent(1); ++j) { + for (index_type i = 0; i <= j; ++i) { + A(i,j) = E(i,j) + x(i) * x(j); + } + } + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting symmetric rank-1 matrix update +// (ExecutionPolicy&&, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ExecutionPolicy, class ElementType_x, @@ -547,7 +854,14 @@ void symmetric_matrix_rank_1_update( Triangle t) { constexpr bool use_custom = is_custom_symmetric_matrix_rank_1_update_avail< - decltype(impl::map_execpolicy_with_check(exec)), void, decltype(x), decltype(A), Triangle + decltype(impl::map_execpolicy_with_check(exec)), + /* ScaleFactorType = */ void, + decltype(x), +#if defined(LINALG_FIX_RANK_UPDATES) + /* decltype(E) = */ void, +#endif // LINALG_FIX_RANK_UPDATES + decltype(A), + Triangle >::value; if constexpr (use_custom) { @@ -558,6 +872,59 @@ void symmetric_matrix_rank_1_update( } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (ExecutionPolicy&&, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) + ) +) +void symmetric_matrix_rank_1_update( + ExecutionPolicy&& exec, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + constexpr bool use_custom = is_custom_symmetric_matrix_rank_1_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + /* ScaleFactorType = */ void, + decltype(x), + decltype(E), + decltype(A), + Triangle + >::value; + + if constexpr (use_custom) { + symmetric_matrix_rank_1_update(impl::map_execpolicy_with_check(exec), x, E, A, t); + } + else { + symmetric_matrix_rank_1_update(impl::inline_exec_t{}, x, E, A, t); + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting symmetric rank-1 matrix update +// (no execution policy, no alpha) MDSPAN_TEMPLATE_REQUIRES( class ElementType_x, class SizeType_x, ::std::size_t ext_x, @@ -582,7 +949,39 @@ void symmetric_matrix_rank_1_update( symmetric_matrix_rank_1_update(impl::default_exec_t{}, x, A, t); } -// Hermitian matrix rank-k update +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating symmetric rank-1 matrix update +// (no execution policy, no alpha) +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_x, + class SizeType_x, ::std::size_t ext_x, + class Layout_x, + class Accessor_x, + class ElementType_E, + class SizeType_E, ::std::size_t numRows_E, + ::std::size_t numCols_E, + class Layout_E, + class Accessor_E, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, + ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void symmetric_matrix_rank_1_update( + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A, + Triangle t) +{ + symmetric_matrix_rank_1_update(impl::default_exec_t{}, x, E, A, t); +} +#endif // LINALG_FIX_RANK_UPDATES // Hermitian matrix rank-1 update @@ -975,8 +1374,6 @@ void hermitian_matrix_rank_1_update( } } else { // upper triangle - printf("OH HAI MARK\n"); - for (index_type j = 0; j < A.extent(1); ++j) { for (index_type i = 0; i <= j; ++i) { if (i == j) { diff --git a/tests/native/her.cpp b/tests/native/her.cpp index aee3b320..19571048 100644 --- a/tests/native/her.cpp +++ b/tests/native/her.cpp @@ -68,14 +68,6 @@ namespace { }; #endif // LINALG_FIX_RANK_UPDATES - void refresh() { - A = std::vector{ - V(-1.0, 0.0), V(-2.0, 0.0), V(-4.0, 0.0), - V(-2.0, 0.0), V(-3.0, 0.0), V(-5.0, 0.0), - V(-4.0, 0.0), V(-5.0, 0.0), V(-6.0, 0.0) - }; - } - using A_type = mdspan>; using const_A_type = mdspan>; using x_type = mdspan>; diff --git a/tests/native/syr.cpp b/tests/native/syr.cpp index e8e82381..76f821af 100644 --- a/tests/native/syr.cpp +++ b/tests/native/syr.cpp @@ -5,103 +5,419 @@ namespace { using LinearAlgebra::symmetric_matrix_rank_1_update; using LinearAlgebra::upper_triangle; - // Regression test for ambiguous overloads of + // A = [-1.0 -2.0 -4.0] + // [-2.0 -3.0 -5.0] + // [-4.0 -5.0 -6.0] + // + // x = [ 2.0] + // [ 5.0] + // [ 11.0] + // + // x x^T = [4.0 10.0 22.0] + // [10.0 25.0 55.0] + // [22.0 55.0 121.0] + // + // 2.0 x x^T = [ 8.0 20.0 44.0] + // [20.0 50.0 110.0] + // [44.0 110.0 242.0] + // + // A + x x^T = [3.0 8.0 18.0] + // [8.0 22.0 50.0] + // [18.0 50.0 115.0] + // + // A + 2.0 x x^T = [ 7.0 18.0 40.0] + // [18.0 47.0 105.0] + // [40.0 105.0 236.0] + class syr_test_problem { + public: + using V = double; + + std::vector A_original{ + V(-1.0), V(-2.0), V(-4.0), + V(-2.0), V(-3.0), V(-5.0), + V(-4.0), V(-5.0), V(-6.0) + }; + + std::vector A{ + V(-1.0), V(-2.0), V(-4.0), + V(-2.0), V(-3.0), V(-5.0), + V(-4.0), V(-5.0), V(-6.0) + }; + + std::vector x{ + V( 2.0), + V( 5.0), + V(11.0) + }; + + std::vector A_plus_x_xT = { + V( 3.0), V( 8.0), V( 18.0), + V( 8.0), V(22.0), V( 50.0), + V(18.0), V(50.0), V(115.0) + }; + +#if defined(LINALG_FIX_RANK_UPDATES) + std::vector x_xT = { + V( 4.0), V(10.0), V( 22.0), + V(10.0), V(25.0), V( 55.0), + V(22.0), V(55.0), V(121.0) + }; + + std::vector two_x_xT = { + V( 8.0), V( 20.0), V( 44.0), + V(20.0), V( 50.0), V(110.0), + V(44.0), V(110.0), V(242.0) + }; + + std::vector A_plus_two_x_xT = { + V( 7.0), V( 18.0), V( 40.0), + V(18.0), V( 47.0), V(105.0), + V(40.0), V(105.0), V(236.0) + }; +#endif // LINALG_FIX_RANK_UPDATES + + using A_type = mdspan>; + using const_A_type = mdspan>; + using x_type = mdspan>; + using result_type = mdspan>; + + A_type A_view() { + return A_type{A.data()}; + } + + const_A_type A_original_view() const { + return const_A_type{A_original.data()}; + } + + x_type x_view() const { + return x_type{x.data()}; + } + + result_type A_plus_x_xT_view() const { + return result_type{A_plus_x_xT.data()}; + } + +#if defined(LINALG_FIX_RANK_UPDATES) + result_type x_xT_view() const { + return result_type{x_xT.data()}; + } + + result_type two_x_xT_view() const { + return result_type{two_x_xT.data()}; + } + + result_type A_plus_two_x_xT_view() const { + return result_type{A_plus_two_x_xT.data()}; + } +#endif // LINALG_FIX_RANK_UPDATES + }; + + // This also serves as a regression test for ambiguous overloads of // symmetric_matrix_rank_1_update (related to // https://github.com/kokkos/stdBLAS/issues/261). // // The reference implementation needs to implement all constraints // of symmetric_matrix_rank_1_update in order to disambiguate // overloads. - TEST(BLAS3_syr, AmbiguousOverloads) + TEST(BLAS3_syr, upper_triangle) { - constexpr auto map_A = layout_right::mapping{extents{}}; - constexpr auto map_expected = map_A; - constexpr auto map_x = layout_right::mapping{extents{}}; - - // A = [-1.0 -2.0 -4.0] - // [-2.0 -3.0 -5.0] - // [-4.0 -5.0 -6.0] - // - // x = [ 2.0] - // [ 5.0] - // [ 11.0] - // - // x x^T = [4.0 10.0 22.0] - // [10.0 25.0 55.0] - // [22.0 55.0 121.0] - // - // A + x x^T = [3.0 8.0 18.0] - // [8.0 22.0 50.0] - // [18.0 50.0 115.0] - // - // A + 2.0 x x^T = [3.0 8.0 18.0] - // [8.0 22.0 50.0] - // [18.0 50.0 115.0] - constexpr std::array A_storage_original{ - -1.0, -2.0, -4.0, - -2.0, -3.0, -5.0, - -4.0, -5.0, -6.0 - }; - constexpr std::array x_storage_original{ - 2.0, - 5.0, - 11.0 - }; - constexpr std::array expected_storage_original{ - 3.0, 8.0, 18.0, - 8.0, 22.0, 50.0, - 18.0, 50.0, 115.0 - }; + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xT = problem.A_plus_x_xT_view(); + const char what[] = " triangle of A = A + 1.0 x x^T (upper)"; - auto A_storage = A_storage_original; - mdspan A{A_storage.data(), map_A}; +#if defined(LINALG_FIX_RANK_UPDATES) + symmetric_matrix_rank_1_update(1.0, x, A, A, upper_triangle); +#else + symmetric_matrix_rank_1_update(1.0, x, A, upper_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } - auto expected_storage = expected_storage_original; - mdspan expected{expected_storage.data(), map_expected}; + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xT = problem.A_plus_x_xT_view(); + const char what[] = " triangle of A = A + x x^T (upper)"; - auto x_storage = x_storage_original; - mdspan x{x_storage.data(), map_x}; +#if defined(LINALG_FIX_RANK_UPDATES) + symmetric_matrix_rank_1_update(x, A, A, upper_triangle); +#else + symmetric_matrix_rank_1_update(x, A, upper_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xT = problem.x_xT_view(); + const char what[] = " triangle of A = 1.0 x x^T (upper)"; - auto check_upper_triangle = [&] () { - for (std::size_t row = 0; row < A.extent(0); ++row) { - for (std::size_t col = row; col < A.extent(1); ++col) { - const auto expected_rc = expected(row, col); - const auto A_rc = A(row, col); - EXPECT_EQ(expected_rc, A_rc) << "at (" << row << ", " << col << ")"; + symmetric_matrix_rank_1_update(1.0, x, A, upper_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; } } - }; - auto check_lower_triangle = [&] () { - for (std::size_t row = 0; row < A.extent(0); ++row) { - for (std::size_t col = 0; col <= row; ++col) { - const auto expected_rc = expected(row, col); - const auto A_rc = A(row, col); - EXPECT_EQ(expected_rc, A_rc) << "at (" << row << ", " << col << ")"; + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xT = problem.A_plus_x_xT_view(); + const char what[] = " triangle of A = 2.0 x x^T (upper)"; + + symmetric_matrix_rank_1_update(2.0, x, A, upper_triangle); + mdspan two_x_xT = problem.two_x_xT_view(); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), two_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; } } - }; + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_two_x_xT = problem.A_plus_two_x_xT_view(); + const char what[] = " triangle of A = A + 2.0 x x^T (upper)"; - symmetric_matrix_rank_1_update(1.0, x, A, upper_triangle); - check_upper_triangle(); - - // Reset values, just in case some bug might have overwritten them. - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - symmetric_matrix_rank_1_update(1.0, x, A, lower_triangle); - check_lower_triangle(); - - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - symmetric_matrix_rank_1_update(x, A, upper_triangle); - check_upper_triangle(); - - A_storage = A_storage_original; - expected_storage = expected_storage_original; - x_storage = x_storage_original; - symmetric_matrix_rank_1_update(x, A, lower_triangle); - check_lower_triangle(); + symmetric_matrix_rank_1_update(2.0, x, A, A, upper_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is unchanged. + for (int col = 0; col < row; ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is changed. + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES } + TEST(BLAS3_syr, lower_triangle) + { + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xT = problem.A_plus_x_xT_view(); + const char what[] = " triangle of A = A + 1.0 x x^T (lower)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + symmetric_matrix_rank_1_update(1.0, x, A, A, lower_triangle); +#else + symmetric_matrix_rank_1_update(1.0, x, A, lower_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_x_xT = problem.A_plus_x_xT_view(); + const char what[] = " triangle of A = A + x x^T (lower)"; + +#if defined(LINALG_FIX_RANK_UPDATES) + symmetric_matrix_rank_1_update(x, A, A, lower_triangle); +#else + symmetric_matrix_rank_1_update(x, A, lower_triangle); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xT = problem.x_xT_view(); + const char what[] = " triangle of A = x x^T (lower)"; + + symmetric_matrix_rank_1_update(x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan x_xT = problem.x_xT_view(); + const char what[] = " triangle of A = 1.0 x x^T (lower)"; + + symmetric_matrix_rank_1_update(1.0, x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan two_x_xT = problem.two_x_xT_view(); + const char what[] = " triangle of A = 2.0 x x^T (lower)"; + + symmetric_matrix_rank_1_update(2.0, x, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), two_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } + + { + syr_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_two_x_xT = problem.A_plus_two_x_xT_view(); + const char what[] = " triangle of A = A + 2.0 x x^T (lower)"; + + symmetric_matrix_rank_1_update(2.0, x, A, A, lower_triangle); + for (int row = 0; row < A.extent(0); ++row) { + // Lower triangle is changed. + for (int col = 0; col <= row; ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_xT(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "lower" << what; + } + // Upper triangle is unchanged. + for (int col = row+1; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_original(row, col)) + << "A(" << row << "," << col << ") is wrong for " + << "upper" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES + } } // end anonymous namespace From 28a273cdf1db4abfb577b953aa8e83a91aff4b07 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 3 Oct 2024 15:35:29 -0600 Subject: [PATCH 04/10] P3371: hermitian_matrix_rank_1_update real alpha One could imagine 3 ways to make sure alpha is real: 1. constrain alpha to be a noncomplex number type, 2. impose a precondition that alpha has zero imaginary part, or 3. set alpha to _`real-if-needed`_`(alpha)`. This commit implements Option (3). This has the advantage of working just like how linalg makes the diagonal real. --- .../__p1673_bits/blas2_matrix_rank_1_update.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp index 328bea5a..239d30ae 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp @@ -1013,6 +1013,10 @@ void hermitian_matrix_rank_1_update( { using index_type = std::common_type_t; +#if defined(LINALG_FIX_RANK_UPDATES) + alpha = impl::real_if_needed(alpha); +#endif // LINALG_FIX_RANK_UPDATES + if constexpr (std::is_same_v) { for (index_type i = 0; i < A.extent(0); ++i) { for (index_type j = 0; j <= i; ++j) { @@ -1076,6 +1080,10 @@ void hermitian_matrix_rank_1_update( { using index_type = std::common_type_t; +#if defined(LINALG_FIX_RANK_UPDATES) + alpha = impl::real_if_needed(alpha); +#endif // LINALG_FIX_RANK_UPDATES + if constexpr (std::is_same_v) { for (index_type j = 0; j < A.extent(1); ++j) { for (index_type i = j; i < A.extent(0); ++i) { From a88a6a2293295a05e1c84d2129c70811caaec799 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 3 Oct 2024 16:14:38 -0600 Subject: [PATCH 05/10] P3371: Add tests for matrix_rank_1_update The function wasn't tested before. If the option to enable P3371's changes is enabled, the new tests don't build. --- tests/native/CMakeLists.txt | 1 + tests/native/ger.cpp | 230 ++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 tests/native/ger.cpp diff --git a/tests/native/CMakeLists.txt b/tests/native/CMakeLists.txt index e8275aa2..c6a2a00c 100644 --- a/tests/native/CMakeLists.txt +++ b/tests/native/CMakeLists.txt @@ -23,6 +23,7 @@ linalg_add_test(dot) linalg_add_test(gemm) linalg_add_test(gemv) linalg_add_test(gemv_no_ambig) +linalg_add_test(ger) linalg_add_test(givens) linalg_add_test(hemm) linalg_add_test(herk) diff --git a/tests/native/ger.cpp b/tests/native/ger.cpp new file mode 100644 index 00000000..1f6d5117 --- /dev/null +++ b/tests/native/ger.cpp @@ -0,0 +1,230 @@ +#include "./gtest_fixtures.hpp" + +namespace { + using LinearAlgebra::lower_triangle; + using LinearAlgebra::matrix_rank_1_update; + using LinearAlgebra::upper_triangle; + + // A = [1.0 2.0 3.0] + // [4.0 11.0 8.0] + // [5.0 3.0 7.0] + // + // x = [ 2.0] + // [ 5.0] + // [ 11.0] + // + // y = [ 3.0] + // [ 7.0] + // [ 13.0] + // + // x y^T = [ 6.0 14.0 26.0] + // [15.0 35.0 65.0] + // [33.0 77.0 143.0] + // + // 2.0 x y^T = [12.0 28.0 52.0] + // [30.0 70.0 130.0] + // [66.0 154.0 286.0] + // + // A + x y^T = [ 7.0 16.0 29.0] + // [19.0 46.0 73.0] + // [38.0 80.0 150.0] + // + // A + 2.0 x y^T = [13.0 30.0 55.0] + // [34.0 88.0 138.0] + // [71.0 157.0 293.0] + class ger_test_problem { + public: + using V = double; + + std::vector A_original{ + V(1.0), V( 2.0), V(3.0), + V(4.0), V(11.0), V(8.0), + V(5.0), V( 3.0), V(7.0) + }; + + std::vector A{ + V(1.0), V( 2.0), V(3.0), + V(4.0), V(11.0), V(8.0), + V(5.0), V( 3.0), V(7.0) + }; + + std::vector x{ + V( 2.0), + V( 5.0), + V(11.0) + }; + + std::vector y{ + V( 3.0), + V( 7.0), + V(13.0) + }; + + std::vector A_plus_x_yT = { + V( 7.0), V(16.0), V( 29.0), + V(19.0), V(46.0), V( 73.0), + V(38.0), V(80.0), V(150.0) + }; + +#if defined(LINALG_FIX_RANK_UPDATES) + std::vector x_yT = { + V( 6.0), V(14.0), V( 26.0), + V(15.0), V(35.0), V( 65.0), + V(33.0), V(77.0), V(143.0) + }; + + std::vector two_x_yT = { + V(12.0), V( 28.0), V( 52.0), + V(30.0), V( 70.0), V(130.0), + V(66.0), V(154.0), V(286.0) + }; + + std::vector A_plus_two_x_yT = { + V(13.0), V( 30.0), V( 55.0), + V(34.0), V( 88.0), V(138.0), + V(71.0), V(157.0), V(293.0) + }; +#endif // LINALG_FIX_RANK_UPDATES + + using A_type = mdspan>; + using const_A_type = mdspan>; + using vec_type = mdspan>; + using result_type = mdspan>; + + A_type A_view() { + return A_type{A.data()}; + } + + const_A_type A_original_view() const { + return const_A_type{A_original.data()}; + } + + vec_type x_view() const { + return vec_type{x.data()}; + } + + vec_type y_view() const { + return vec_type{y.data()}; + } + + result_type A_plus_x_yT_view() const { + return result_type{A_plus_x_yT.data()}; + } + +#if defined(LINALG_FIX_RANK_UPDATES) + result_type x_yT_view() const { + return result_type{x_yT.data()}; + } + + result_type two_x_yT_view() const { + return result_type{two_x_yT.data()}; + } + + result_type A_plus_two_x_yT_view() const { + return result_type{A_plus_two_x_yT.data()}; + } +#endif // LINALG_FIX_RANK_UPDATES + }; + + TEST(BLAS3_ger, test0) + { + { + ger_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan A_plus_x_yT = problem.A_plus_x_yT_view(); + const char what[] = " is wrong for A = A + 1.0 x y^T"; + +#if defined(LINALG_FIX_RANK_UPDATES) + matrix_rank_1_update(1.0, x, y, A, A); +#else + matrix_rank_1_update(1.0, x, y, A); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + ger_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan A_plus_x_yT = problem.A_plus_x_yT_view(); + const char what[] = " is wrong for A = A + x y^T"; + +#if defined(LINALG_FIX_RANK_UPDATES) + matrix_rank_1_update(x, y, A, A); +#else + matrix_rank_1_update(x, y, A); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + ger_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan x_yT = problem.x_yT_view(); + const char what[] = " is wrong for A = 1.0 x y^T"; + + matrix_rank_1_update(1.0, x, y, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + ger_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan two_x_yT = problem.two_x_yT_view(); + const char what[] = " is wrong for A = 2.0 x y^T"; + + matrix_rank_1_update(2.0, x, y, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), two_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + ger_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan A_plus_two_x_yT = problem.A_plus_two_x_yT_view(); + const char what[] = " is wrong for A = A + 2.0 x y^T"; + + matrix_rank_1_update(2.0, x, y, A, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES + } +} // end anonymous namespace From f1a78e22f612697e1eaebdbc210eb1f6ead88c22 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 3 Oct 2024 17:19:27 -0600 Subject: [PATCH 06/10] P3371R1: Implement matrix_rank_1_update Change existing matrix_rank_1_update overloads to be overwriting instead of unconditionally updating, and add updating matrix_rank_1_update overloads. Guard changes with the appropriate macro, which by default is NOT defined. Add lots of tests to ensure coverage of all cases. Tests pass whether or not P3371 support is enabled. --- .../blas2_matrix_rank_1_update.hpp | 182 +++++++++++++++++- tests/native/ger.cpp | 22 ++- 2 files changed, 191 insertions(+), 13 deletions(-) diff --git a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp index 239d30ae..c81b975e 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp @@ -50,9 +50,53 @@ namespace linalg { namespace { // (anonymous) +#if defined(LINALG_FIX_RANK_UPDATES) + +// For the overwriting case, use E_t = void. +template +struct is_custom_matrix_rank_1_update_avail : std::false_type {}; + +// Overwriting, ExecutionPolicy != inline_exec_t +template +struct is_custom_matrix_rank_1_update_avail< + Exec, x_t, y_t, /* E_t = */ void, A_t, + std::enable_if_t< + std::is_void_v< + decltype(matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +// Updating, ExecutionPolicy != inline_exec_t +template +struct is_custom_matrix_rank_1_update_avail< + Exec, x_t, y_t, E_t, A_t, + std::enable_if_t< + std::is_void_v< + decltype(matrix_rank_1_update( + std::declval(), + std::declval(), + std::declval(), + std::declval(), // implies not void + std::declval())) + > && + ! impl::is_inline_exec_v + > +> : std::true_type +{}; + +#else + template struct is_custom_matrix_rank_1_update_avail : std::false_type {}; +// ExecutionPolicy != inline_exec_t template struct is_custom_matrix_rank_1_update_avail< Exec, x_t, y_t, A_t, @@ -71,6 +115,8 @@ struct is_custom_matrix_rank_1_update_avail< > : std::true_type{}; +#endif // LINALG_FIX_RANK_UPDATES + #if defined(LINALG_FIX_RANK_UPDATES) // For the overwriting case, use E_t = void. @@ -177,7 +223,7 @@ struct is_custom_symmetric_matrix_rank_1_update_avail< : std::true_type {}; -// ExecutionPolicy != inline_exec_t, alpha +// ExecutionPolicy != inline_exec_t, alpha template struct is_custom_symmetric_matrix_rank_1_update_avail< Exec, ScaleFactorType, x_t, A_t, Tr_t, @@ -325,8 +371,10 @@ struct is_custom_hermitian_matrix_rank_1_update_avail< } // namespace (anonymous) -// Nonsymmetric non-conjugated rank-1 update +// Nonsymmetric nonconjugated matrix rank-1 update +// Overwriting nonsymmetric nonconjugated matrix rank-1 update +// (inline_exec_t) template +void matrix_rank_1_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A) +{ + using size_type = ::std::common_type_t; + + for (size_type i = 0; i < A.extent(0); ++i) { + for (size_type j = 0; j < A.extent(1); ++j) { + A(i,j) = E(i,j) + x(i) * y(j); + } + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting nonsymmetric nonconjugated matrix rank-1 update +// (ExecutionPolicy&&) template, Layout_A, Accessor_A> A) { constexpr bool use_custom = is_custom_matrix_rank_1_update_avail< - decltype(impl::map_execpolicy_with_check(exec)), decltype(x), decltype(y), decltype(A) + decltype(impl::map_execpolicy_with_check(exec)), + decltype(x), + decltype(y), +#if defined(LINALG_FIX_RANK_UPDATES) + /* decltype(E) = */ void, +#endif // LINALG_FIX_RANK_UPDATES + decltype(A) >::value; if constexpr (use_custom) { @@ -387,6 +485,54 @@ void matrix_rank_1_update( } } +#if defined(LINALG_FIX_RANK_UPDATES) +// Updating nonsymmetric nonconjugated matrix rank-1 update +// (ExecutionPolicy&&) +template +void matrix_rank_1_update( + ExecutionPolicy&& exec, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A) +{ + constexpr bool use_custom = is_custom_matrix_rank_1_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + decltype(x), + decltype(y), + decltype(E), + decltype(A) + >::value; + + if constexpr (use_custom) { + matrix_rank_1_update(impl::map_execpolicy_with_check(exec), x, y, E, A); + } + else { + matrix_rank_1_update(impl::inline_exec_t{}, x, y, E, A); + } +} +#endif // LINALG_FIX_RANK_UPDATES + +// Overwriting nonsymmetric nonconjugated rank-1 matrix update +// (no execution policy) template +void matrix_rank_1_update( + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A) +{ + matrix_rank_1_update(impl::default_exec_t{}, x, y, E, A); +} +#endif // LINALG_FIX_RANK_UPDATES // Nonsymmetric conjugated rank-1 update diff --git a/tests/native/ger.cpp b/tests/native/ger.cpp index 1f6d5117..57650f60 100644 --- a/tests/native/ger.cpp +++ b/tests/native/ger.cpp @@ -3,6 +3,7 @@ namespace { using LinearAlgebra::lower_triangle; using LinearAlgebra::matrix_rank_1_update; + using LinearAlgebra::scaled; using LinearAlgebra::upper_triangle; // A = [1.0 2.0 3.0] @@ -81,7 +82,7 @@ namespace { std::vector A_plus_two_x_yT = { V(13.0), V( 30.0), V( 55.0), - V(34.0), V( 88.0), V(138.0), + V(34.0), V( 81.0), V(138.0), V(71.0), V(157.0), V(293.0) }; #endif // LINALG_FIX_RANK_UPDATES @@ -135,12 +136,12 @@ namespace { mdspan x = problem.x_view(); mdspan y = problem.y_view(); mdspan A_plus_x_yT = problem.A_plus_x_yT_view(); - const char what[] = " is wrong for A = A + 1.0 x y^T"; + const char what[] = " is wrong for A = A + (1.0 x) y^T"; #if defined(LINALG_FIX_RANK_UPDATES) - matrix_rank_1_update(1.0, x, y, A, A); + matrix_rank_1_update(scaled(1.0, x), y, A, A); #else - matrix_rank_1_update(1.0, x, y, A); + matrix_rank_1_update(scaled(1.0, x), y, A); #endif // LINALG_FIX_RANK_UPDATES for (int row = 0; row < A.extent(0); ++row) { for (int col = row; col < A.extent(1); ++col) { @@ -180,9 +181,9 @@ namespace { mdspan x = problem.x_view(); mdspan y = problem.y_view(); mdspan x_yT = problem.x_yT_view(); - const char what[] = " is wrong for A = 1.0 x y^T"; + const char what[] = " is wrong for A = (1.0 x) y^T"; - matrix_rank_1_update(1.0, x, y, A); + matrix_rank_1_update(scaled(1.0, x), y, A); for (int row = 0; row < A.extent(0); ++row) { for (int col = row; col < A.extent(1); ++col) { EXPECT_EQ(A(row, col), x_yT(row, col)) @@ -198,9 +199,9 @@ namespace { mdspan x = problem.x_view(); mdspan y = problem.y_view(); mdspan two_x_yT = problem.two_x_yT_view(); - const char what[] = " is wrong for A = 2.0 x y^T"; + const char what[] = " is wrong for A = (2.0 x) y^T"; - matrix_rank_1_update(2.0, x, y, A); + matrix_rank_1_update(scaled(2.0, x), y, A); for (int row = 0; row < A.extent(0); ++row) { for (int col = row; col < A.extent(1); ++col) { EXPECT_EQ(A(row, col), two_x_yT(row, col)) @@ -214,10 +215,11 @@ namespace { mdspan A = problem.A_view(); mdspan A_original = problem.A_original_view(); mdspan x = problem.x_view(); + mdspan y = problem.y_view(); mdspan A_plus_two_x_yT = problem.A_plus_two_x_yT_view(); - const char what[] = " is wrong for A = A + 2.0 x y^T"; + const char what[] = " is wrong for A = A + (2.0 x) y^T"; - matrix_rank_1_update(2.0, x, y, A, A); + matrix_rank_1_update(scaled(2.0, x), y, A, A); for (int row = 0; row < A.extent(0); ++row) { for (int col = row; col < A.extent(1); ++col) { EXPECT_EQ(A(row, col), A_plus_two_x_yT(row, col)) From 1aacd940bc76a9df719e94715cb7c1c678e98bc1 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 6 Oct 2024 19:08:31 -0600 Subject: [PATCH 07/10] P3371R1: Implement matrix_rank_1_update_c Change existing matrix_rank_1_update_c overloads to be overwriting instead of unconditionally updating, and add updating matrix_rank_1_update_c overloads. Guard changes with the appropriate macro, which by default is NOT defined. Add lots of tests to ensure coverage of all cases. Tests pass whether or not P3371 support is enabled. --- .../blas2_matrix_rank_1_update.hpp | 62 ++++- tests/native/CMakeLists.txt | 1 + tests/native/gerc.cpp | 216 ++++++++++++++++++ 3 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 tests/native/gerc.cpp diff --git a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp index c81b975e..1fd9d1cd 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_rank_1_update.hpp @@ -608,6 +608,59 @@ void matrix_rank_1_update_c( matrix_rank_1_update(x, conjugated(y), A); } +#if defined(LINALG_FIX_RANK_UPDATES) +template +void matrix_rank_1_update_c( + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_A, Accessor_A> A) +{ + matrix_rank_1_update(x, conjugated(y), E, A); +} +#endif // LINALG_FIX_RANK_UPDATES + +template +void matrix_rank_1_update_c( + ExecutionPolicy&& exec, + mdspan, Layout_x, Accessor_x> x, + mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_A, Accessor_A> A) +{ + matrix_rank_1_update(std::forward(exec), x, conjugated(y), A); +} + +#if defined(LINALG_FIX_RANK_UPDATES) template, Layout_x, Accessor_x> x, mdspan, Layout_y, Accessor_y> y, + mdspan, Layout_E, Accessor_E> E, mdspan, Layout_A, Accessor_A> A) { - matrix_rank_1_update(exec, x, conjugated(y), A); + matrix_rank_1_update(std::forward(exec), x, conjugated(y), E, A); } +#endif // LINALG_FIX_RANK_UPDATES // Symmetric matrix rank-1 update diff --git a/tests/native/CMakeLists.txt b/tests/native/CMakeLists.txt index c6a2a00c..814763d2 100644 --- a/tests/native/CMakeLists.txt +++ b/tests/native/CMakeLists.txt @@ -24,6 +24,7 @@ linalg_add_test(gemm) linalg_add_test(gemv) linalg_add_test(gemv_no_ambig) linalg_add_test(ger) +linalg_add_test(gerc) linalg_add_test(givens) linalg_add_test(hemm) linalg_add_test(herk) diff --git a/tests/native/gerc.cpp b/tests/native/gerc.cpp new file mode 100644 index 00000000..8498ffef --- /dev/null +++ b/tests/native/gerc.cpp @@ -0,0 +1,216 @@ +#include "./gtest_fixtures.hpp" + +namespace { + using LinearAlgebra::lower_triangle; + using LinearAlgebra::matrix_rank_1_update_c; + using LinearAlgebra::scaled; + using LinearAlgebra::upper_triangle; + + // A = [1.0 2.0 3.0] + // [4.0 11.0 8.0] + // [5.0 3.0 7.0 + 1.0i] + // + // x = [ 2.0 + 1.0i] + // [ 5.0 - 3.0i] + // [ 11.0 + 5.0i] + // + // y = [ 3.0 + 1.0i] + // [ 7.0 - 3.0i] + // [ 13.0 + 5.0i] + class gerc_test_problem { + public: + using V = std::complex; + + std::vector A_original{ + V(1.0, 0.0), V( 2.0, 0.0), V(3.0, 0.0), + V(4.0, 0.0), V(11.0, 0.0), V(8.0, 0.0), + V(5.0, 0.0), V( 3.0, 0.0), V(7.0, 1.0) + }; + + std::vector A{ + V(1.0, 0.0), V( 2.0, 0.0), V(3.0, 0.0), + V(4.0, 0.0), V(11.0, 0.0), V(8.0, 0.0), + V(5.0, 0.0), V( 3.0, 0.0), V(7.0, 1.0) + }; + + std::vector x{ + V( 2.0, 1.0), + V( 5.0, -3.0), + V(11.0, 5.0) + }; + + std::vector y{ + V( 3.0, 1.0), + V( 7.0, -3.0), + V(13.0, 5.0) + }; + + std::vector A_plus_x_yT = { + V( 8.0, 1.0), V(13.0, 13.0), V( 34.0, 3.0), + V(16.0, -14.0), V(55.0, -6.0), V( 58.0, -64.0), + V(43.0, 4.0), V(65.0, 68.0), V(175.0, 11.0) + }; + +#if defined(LINALG_FIX_RANK_UPDATES) + std::vector x_yT = { + V( 7.0, 1.0), V(11.0, 13.0), V( 31.0, 3.0), + V(12.0, -14.0), V(44.0, -6.0), V( 50.0, -64.0), + V(38.0, 4.0), V(62.0, 68.0), V(168.0, 10.0) + }; + + std::vector two_x_yT = { + V(14.0, 2.0), V( 22.0, 26.0), V( 62.0, 6.0), + V(24.0, -28.0), V( 88.0, -12.0), V(100.0, -128.0), + V(76.0, 8.0), V(124.0, 136.0), V(336.0, 20.0) + }; + + std::vector A_plus_two_x_yT = { + V(15.0, 2.0), V( 24.0, 26.0), V( 65.0, 6.0), + V(28.0, -28.0), V( 99.0, -12.0), V(108.0, -128.0), + V(81.0, 8.0), V(127.0, 136.0), V(343.0, 21.0) + }; +#endif // LINALG_FIX_RANK_UPDATES + + using A_type = mdspan>; + using const_A_type = mdspan>; + using vec_type = mdspan>; + using result_type = mdspan>; + + A_type A_view() { + return A_type{A.data()}; + } + + const_A_type A_original_view() const { + return const_A_type{A_original.data()}; + } + + vec_type x_view() const { + return vec_type{x.data()}; + } + + vec_type y_view() const { + return vec_type{y.data()}; + } + + result_type A_plus_x_yT_view() const { + return result_type{A_plus_x_yT.data()}; + } + +#if defined(LINALG_FIX_RANK_UPDATES) + result_type x_yT_view() const { + return result_type{x_yT.data()}; + } + + result_type two_x_yT_view() const { + return result_type{two_x_yT.data()}; + } + + result_type A_plus_two_x_yT_view() const { + return result_type{A_plus_two_x_yT.data()}; + } +#endif // LINALG_FIX_RANK_UPDATES + }; + + TEST(BLAS3_ger, test0) + { + { + gerc_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan A_plus_x_yT = problem.A_plus_x_yT_view(); + const char what[] = " is wrong for A = A + (1.0 x) y^T"; + +#if defined(LINALG_FIX_RANK_UPDATES) + matrix_rank_1_update_c(scaled(1.0, x), y, A, A); +#else + matrix_rank_1_update_c(scaled(1.0, x), y, A); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + gerc_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan A_plus_x_yT = problem.A_plus_x_yT_view(); + const char what[] = " is wrong for A = A + x y^T"; + +#if defined(LINALG_FIX_RANK_UPDATES) + matrix_rank_1_update_c(x, y, A, A); +#else + matrix_rank_1_update_c(x, y, A); +#endif // LINALG_FIX_RANK_UPDATES + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + { + gerc_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan x_yT = problem.x_yT_view(); + const char what[] = " is wrong for A = (1.0 x) y^T"; + + matrix_rank_1_update_c(scaled(1.0, x), y, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + gerc_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan two_x_yT = problem.two_x_yT_view(); + const char what[] = " is wrong for A = (2.0 x) y^T"; + + matrix_rank_1_update_c(scaled(2.0, x), y, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), two_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } + + { + gerc_test_problem problem; + mdspan A = problem.A_view(); + mdspan A_original = problem.A_original_view(); + mdspan x = problem.x_view(); + mdspan y = problem.y_view(); + mdspan A_plus_two_x_yT = problem.A_plus_two_x_yT_view(); + const char what[] = " is wrong for A = A + (2.0 x) y^T"; + + matrix_rank_1_update_c(scaled(2.0, x), y, A, A); + for (int row = 0; row < A.extent(0); ++row) { + for (int col = row; col < A.extent(1); ++col) { + EXPECT_EQ(A(row, col), A_plus_two_x_yT(row, col)) + << "A(" << row << "," << col << ")" << what; + } + } + } +#endif // LINALG_FIX_RANK_UPDATES + } +} // end anonymous namespace From 01b7aef8e1291d16679a43e6398ec0c265a905aa Mon Sep 17 00:00:00 2001 From: iburyl Date: Mon, 28 Oct 2024 13:53:22 -0700 Subject: [PATCH 08/10] add P3371R1 for symmetric/hermitian_matrix_rank_k_update --- .../blas3_matrix_rank_k_update.hpp | 582 +++++++++++++++++- .../__p1673_bits/linalg_execpolicy_mapper.hpp | 20 +- tests/native/herk.cpp | 32 +- tests/native/syrk.cpp | 32 +- 4 files changed, 651 insertions(+), 15 deletions(-) diff --git a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp index d4f17a99..aff5a991 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp @@ -86,6 +86,43 @@ struct is_custom_sym_mat_rank_k_update_avail< : std::true_type{}; +template +struct is_custom_sym_mat_rank_k_update_with_update_avail : std::false_type {}; + +template +struct is_custom_sym_mat_rank_k_update_with_update_avail< + Exec, void, A_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_sym_mat_rank_k_update_with_update_avail< + Exec, ScaleFactorType, A_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + + template struct is_custom_herm_mat_rank_k_update_avail : std::false_type {}; @@ -120,6 +157,43 @@ struct is_custom_herm_mat_rank_k_update_avail< > : std::true_type{}; +template +struct is_custom_herm_mat_rank_k_update_with_update_avail : std::false_type {}; + + +template +struct is_custom_herm_mat_rank_k_update_with_update_avail< + Exec, void, A_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_herm_mat_rank_k_update_with_update_avail< + Exec, ScaleFactorType, A_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + } //end anonym namespace // Rank-k update of a symmetric matrix with scaling factor alpha @@ -136,8 +210,9 @@ MDSPAN_TEMPLATE_REQUIRES( class Accessor_C, class Triangle, /* requires */ ( - std::is_same_v || - std::is_same_v + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v ) ) void symmetric_matrix_rank_k_update( @@ -155,6 +230,7 @@ void symmetric_matrix_rank_k_update( const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = ElementType_C{}; for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += alpha * A(i, k) * A(j, k); } @@ -177,7 +253,9 @@ MDSPAN_TEMPLATE_REQUIRES( /* requires */ ( impl::is_linalg_execution_policy_other_than_inline_v && (std::is_same_v || - std::is_same_v) + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) ) void symmetric_matrix_rank_k_update( @@ -212,7 +290,9 @@ MDSPAN_TEMPLATE_REQUIRES( /* requires */ ( (! impl::is_linalg_execution_policy_other_than_inline_v) && (std::is_same_v || - std::is_same_v) + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) ) void symmetric_matrix_rank_k_update( @@ -255,6 +335,7 @@ void symmetric_matrix_rank_k_update( const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = ElementType_C{}; for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += A(i, k) * A(j, k); } @@ -319,9 +400,96 @@ void symmetric_matrix_rank_k_update( symmetric_matrix_rank_k_update(impl::default_exec_t{}, A, C, t); } -// Rank-k update of a Hermitian matrix +// Rank-k update of a symmetric matrix (updating versions) -// Rank-k update of a Hermitian matrix with scaling factor alpha +// Rank-k update of a symmetric matrix with scaling factor alpha + +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) +) +void symmetric_matrix_rank_k_update( + impl::inline_exec_t&& /* exec */, + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + constexpr bool lower_tri = + std::is_same_v; + using size_type = std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = E(i, j); + for (size_type k = 0; k < A.extent(1); ++k) { + C(i, j) += alpha * A(i, k) * A(j, k); + } + } + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) +) +void symmetric_matrix_rank_k_update( + ExecutionPolicy&& exec, + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_sym_mat_rank_k_update_with_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + ScaleFactorType, decltype(A), decltype(E), decltype(C), Triangle>::value; + + if constexpr (use_custom) { + symmetric_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), alpha, A, E, C, t); + } else { + symmetric_matrix_rank_k_update(impl::inline_exec_t{}, alpha, A, E, C, t); + } +} MDSPAN_TEMPLATE_REQUIRES( class ScaleFactorType, @@ -329,6 +497,43 @@ MDSPAN_TEMPLATE_REQUIRES( class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, class Layout_A, class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (! impl::is_linalg_execution_policy_other_than_inline_v) && + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) +) +void symmetric_matrix_rank_k_update( + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + symmetric_matrix_rank_k_update(impl::default_exec_t{}, alpha, A, E, C, t); +} + +// Rank-k update of a symmetric matrix without scaling factor alpha + +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, class ElementType_C, class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, class Layout_C, @@ -339,6 +544,117 @@ MDSPAN_TEMPLATE_REQUIRES( std::is_same_v ) ) +void symmetric_matrix_rank_k_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + constexpr bool lower_tri = + std::is_same_v; + using size_type = std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = E(i, j); + for (size_type k = 0; k < A.extent(1); ++k) { + C(i, j) += A(i, k) * A(j, k); + } + } + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) + ) +) +void symmetric_matrix_rank_k_update( + ExecutionPolicy&& exec, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_sym_mat_rank_k_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), void, decltype(A), decltype(E), decltype(C), Triangle + >::value; + + if constexpr (use_custom) { + symmetric_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), A, E, C, t); + } else { + symmetric_matrix_rank_k_update(impl::inline_exec_t{}, A, E, C, t); + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + std::is_same_v || + std::is_same_v + ) +) +void symmetric_matrix_rank_k_update( + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + symmetric_matrix_rank_k_update(impl::default_exec_t{}, A, E, C, t); +} + +// Rank-k update of a Hermitian matrix + +// Rank-k update of a Hermitian matrix with scaling factor alpha + +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v + ) +) void hermitian_matrix_rank_k_update( impl::inline_exec_t&& /* exec */, ScaleFactorType alpha, @@ -355,8 +671,9 @@ void hermitian_matrix_rank_k_update( for (size_type j = 0; j < C.extent(1); ++j) { const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; - C(j, j) = impl::real_if_needed(C(j, j)); + //C(j, j) = impl::real_if_needed(C(j, j)); for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = ElementType_C{}; for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += alpha * A(i, k) * impl::conj_if_needed(A(j, k)); } @@ -379,7 +696,8 @@ MDSPAN_TEMPLATE_REQUIRES( /* requires */ ( impl::is_linalg_execution_policy_other_than_inline_v && (std::is_same_v || - std::is_same_v) + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v ) ) void hermitian_matrix_rank_k_update( @@ -414,7 +732,8 @@ MDSPAN_TEMPLATE_REQUIRES( /* requires */ ( (! impl::is_linalg_execution_policy_other_than_inline_v) && (std::is_same_v || - std::is_same_v) + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v ) ) void hermitian_matrix_rank_k_update( @@ -456,10 +775,11 @@ void hermitian_matrix_rank_k_update( using size_type = std::common_type_t; for (size_type j = 0; j < C.extent(1); ++j) { - C(j, j) = impl::real_if_needed(C(j, j)); + //C(j, j) = impl::real_if_needed(C(j, j)); const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = ElementType_C{}; for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += A(i, k) * impl::conj_if_needed(A(j, k)); } @@ -524,6 +844,248 @@ void hermitian_matrix_rank_k_update( hermitian_matrix_rank_k_update(impl::default_exec_t{}, A, C, t); } +// Rank-k update of a Hermitian matrix with scaling factor alpha (updating version) + +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + impl::inline_exec_t&& /* exec */, + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + using size_type = std::common_type_t; + + constexpr bool lower_tri = + std::is_same_v; + using size_type = std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + C(j, j) = impl::real_if_needed(E(j, j)); + for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = E(i, j); + for (size_type k = 0; k < A.extent(1); ++k) { + C(i, j) += alpha * A(i, k) * impl::conj_if_needed(A(j, k)); + } + } + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + ExecutionPolicy&& exec, + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_herm_mat_rank_k_update_with_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + ScaleFactorType, decltype(A), decltype(E), decltype(C), Triangle>::value; + + if constexpr (use_custom) { + hermitian_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), alpha, A, E, C, t); + } else { + hermitian_matrix_rank_k_update(impl::inline_exec_t{}, alpha, A, E, C, t); + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ScaleFactorType, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (! impl::is_linalg_execution_policy_other_than_inline_v) && + (std::is_same_v || + std::is_same_v) && + !impl::is_linalg_execution_policy_v && !impl::is_mdspan_v && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + ScaleFactorType alpha, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + hermitian_matrix_rank_k_update(impl::default_exec_t{}, alpha, A, E, C, t); +} + +// Rank-k update of a Hermitian matrix without scaling factor alpha (updating version) + +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + using size_type = std::common_type_t; + + constexpr bool lower_tri = + std::is_same_v; + using size_type = std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + C(j, j) = impl::real_if_needed(C(j, j)); + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + for (size_type i = i_lower; i < i_upper; ++i) { + C(i, j) = E(i, j); + for (size_type k = 0; k < A.extent(1); ++k) { + C(i, j) += A(i, k) * impl::conj_if_needed(A(j, k)); + } + } + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ExecutionPolicy, + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + impl::is_linalg_execution_policy_other_than_inline_v && + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + ExecutionPolicy&& exec, + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_herm_mat_rank_k_update_with_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), + void, decltype(A), decltype(E), decltype(C), Triangle>::value; + + if constexpr (use_custom) { + hermitian_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), A, E, C, t); + } else { + hermitian_matrix_rank_k_update(impl::inline_exec_t{}, A, E, C, t); + } +} + +MDSPAN_TEMPLATE_REQUIRES( + class ElementType_A, + class SizeType_A, ::std::size_t numRows_A, ::std::size_t numCols_A, + class Layout_A, + class Accessor_A, + class ElementType_E, + class Extents_E, + class Layout_E, + class Accessor_E, + class ElementType_C, + class SizeType_C, ::std::size_t numRows_C, ::std::size_t numCols_C, + class Layout_C, + class Accessor_C, + class Triangle, + /* requires */ ( + (std::is_same_v || + std::is_same_v) && + Extents_E::rank() == 2 + ) +) +void hermitian_matrix_rank_k_update( + mdspan, Layout_A, Accessor_A> A, + mdspan E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + hermitian_matrix_rank_k_update(impl::default_exec_t{}, A, E, C, t); +} + } // end namespace linalg } // end inline namespace __p1673_version_0 } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE diff --git a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp index 26a30b9d..0f2135dc 100644 --- a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp +++ b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp @@ -29,7 +29,7 @@ template inline constexpr bool is_custom_linalg_execution_policy_v = std::is_same_v || std::is_same_v; -// value is true if and only if T is _not_ inline_exec, and if T is +// value is true if and only if T is // // * a Standard execution policy (like std::execution::parallel_policy), // * one of the C++ implementation-specific execution policies, or @@ -40,8 +40,7 @@ inline constexpr bool is_custom_linalg_execution_policy_v = // unconstrained template parameters like ScaleFactorType in // algorithms like symmetric_matrix_rank_k_update. template -inline constexpr bool is_linalg_execution_policy_other_than_inline_v = - ! is_inline_exec_v && +inline constexpr bool is_linalg_execution_policy_v = ( #ifdef LINALG_HAS_EXECUTION std::is_execution_policy_v || @@ -49,6 +48,21 @@ inline constexpr bool is_linalg_execution_policy_other_than_inline_v = is_custom_linalg_execution_policy_v ); +// value is true if and only if T is _not_ inline_exec, and if T is +// +// * a Standard execution policy (like std::execution::parallel_policy), +// * one of the C++ implementation-specific execution policies, or +// * one of the std::linalg-specific custom execution policies +// (other than inline_exec). +// +// This helps disambiguate ExecutionPolicy from otherwise +// unconstrained template parameters like ScaleFactorType in +// algorithms like symmetric_matrix_rank_k_update. +template +inline constexpr bool is_linalg_execution_policy_other_than_inline_v = + ! is_inline_exec_v && + is_linalg_execution_policy_v; + } // namespace impl } // namespace linalg } // inline namespace __p1673_version_0 diff --git a/tests/native/herk.cpp b/tests/native/herk.cpp index 71cd7e76..95c1bb17 100644 --- a/tests/native/herk.cpp +++ b/tests/native/herk.cpp @@ -3,6 +3,7 @@ namespace { using LinearAlgebra::symmetric_matrix_rank_k_update; using LinearAlgebra::transposed; + using LinearAlgebra::scaled; using LinearAlgebra::upper_triangle; // This is a regression test that follows on from @@ -43,6 +44,7 @@ namespace { std::array WC = WC_original; mdspan C{WC.data(), map_C}; + mdspan C_original{WC_original.data(), map_C}; std::array WA = WA_original; mdspan A{WA.data(), layout_left::mapping{extents{}}}; @@ -54,7 +56,7 @@ namespace { for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { - const auto expected_rc = expected(row, col); + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } @@ -67,6 +69,20 @@ namespace { hermitian_matrix_rank_k_update(A, C, upper_triangle); + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + hermitian_matrix_rank_k_update(A, C, C, upper_triangle); + for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { const auto expected_rc = expected(row, col); @@ -74,6 +90,20 @@ namespace { EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + hermitian_matrix_rank_k_update(A, scaled(2., C), C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)+C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } } } // end anonymous namespace diff --git a/tests/native/syrk.cpp b/tests/native/syrk.cpp index 8f0dec0e..c1ee95d3 100644 --- a/tests/native/syrk.cpp +++ b/tests/native/syrk.cpp @@ -3,6 +3,7 @@ namespace { using LinearAlgebra::symmetric_matrix_rank_k_update; using LinearAlgebra::transposed; + using LinearAlgebra::scaled; using LinearAlgebra::upper_triangle; // Regression test for https://github.com/kokkos/stdBLAS/issues/261 @@ -42,6 +43,7 @@ namespace { std::array WC = WC_original; mdspan C{WC.data(), map_C}; + mdspan C_original{WC_original.data(), map_C}; std::array WA = WA_original; mdspan A{WA.data(), layout_left::mapping{extents{}}}; @@ -53,7 +55,7 @@ namespace { for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { - const auto expected_rc = expected(row, col); + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } @@ -66,6 +68,20 @@ namespace { symmetric_matrix_rank_k_update(A, C, upper_triangle); + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + symmetric_matrix_rank_k_update(A, C, C, upper_triangle); + for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { const auto expected_rc = expected(row, col); @@ -73,6 +89,20 @@ namespace { EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + symmetric_matrix_rank_k_update(A, scaled(2., C), C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)+C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } } } // end anonymous namespace From d0bdbe3f1e62efea60f4e60a27c6aa59a4aad8e3 Mon Sep 17 00:00:00 2001 From: iburyl Date: Tue, 29 Oct 2024 15:17:06 -0700 Subject: [PATCH 09/10] moved changes in rank-k under LINALG_FIX_RANK_UPDATES macro + added implementation and testing for rank 2k --- .../blas3_matrix_rank_2k_update.hpp | 328 +++++++++++++++++- .../blas3_matrix_rank_k_update.hpp | 138 +++++++- tests/native/CMakeLists.txt | 2 + tests/native/her2k.cpp | 120 +++++++ tests/native/herk.cpp | 19 +- tests/native/syr2k.cpp | 119 +++++++ tests/native/syrk.cpp | 12 + 7 files changed, 716 insertions(+), 22 deletions(-) create mode 100644 tests/native/her2k.cpp create mode 100644 tests/native/syr2k.cpp diff --git a/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp b/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp index de8dae1b..0d7bfd60 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp @@ -50,6 +50,47 @@ namespace linalg { namespace{ +#if defined(LINALG_FIX_RANK_UPDATES) + +template +struct is_custom_sym_mat_rank_2k_update_avail : std::false_type {}; + +template +struct is_custom_sym_mat_rank_2k_update_avail< + Exec, A_t, B_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_2k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_sym_mat_rank_2k_update_avail< + Exec, A_t, B_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_2k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + + +#else + template struct is_custom_sym_mat_rank_2k_update_avail : std::false_type {}; @@ -69,6 +110,48 @@ struct is_custom_sym_mat_rank_2k_update_avail< > : std::true_type{}; +#endif + +#if defined(LINALG_FIX_RANK_UPDATES) + +template +struct is_custom_herm_mat_rank_2k_update_avail : std::false_type {}; + +template +struct is_custom_herm_mat_rank_2k_update_avail< + Exec, A_t, B_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_2k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_herm_mat_rank_2k_update_avail< + Exec, A_t, B_t, E_t, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_2k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +#else + template struct is_custom_herm_mat_rank_2k_update_avail : std::false_type {}; @@ -88,6 +171,8 @@ struct is_custom_herm_mat_rank_2k_update_avail< > : std::true_type{}; +#endif + } // end anonym namespace // Rank-2k update of a symmetric matrix @@ -120,6 +205,9 @@ void symmetric_matrix_rank_2k_update( const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) + C(i,j) = ElementType_C{}; +#endif for (size_type k = 0; k < A.extent(1); ++k) { C(i,j) += A(i,k)*B(j,k) + B(i,k)*A(j,k); } @@ -149,7 +237,11 @@ void symmetric_matrix_rank_2k_update( Triangle t) { constexpr bool use_custom = is_custom_sym_mat_rank_2k_update_avail< - decltype(impl::map_execpolicy_with_check(exec)), decltype(A), decltype(B), decltype(C), Triangle + decltype(impl::map_execpolicy_with_check(exec)), decltype(A), decltype(B), +#if defined(LINALG_FIX_RANK_UPDATES) + void, +#endif + decltype(C), Triangle >::value; if constexpr (use_custom) { @@ -181,8 +273,118 @@ void symmetric_matrix_rank_2k_update( symmetric_matrix_rank_2k_update(impl::default_exec_t{}, A, B, C, t); } +#if defined(LINALG_FIX_RANK_UPDATES) -// Rank-2k update of a Hermitian matrix +// Rank-2k update of a symmetric matrix + +template +void symmetric_matrix_rank_2k_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + constexpr bool lower_tri = + std::is_same_v; + using size_type = ::std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + for (size_type i = i_lower; i < i_upper; ++i) { + C(i,j) = E(i,j); + for (size_type k = 0; k < A.extent(1); ++k) { + C(i,j) += A(i,k)*B(j,k) + B(i,k)*A(j,k); + } + } + } +} + +template +void symmetric_matrix_rank_2k_update( + ExecutionPolicy&& exec , + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_sym_mat_rank_2k_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), decltype(A), decltype(B), decltype(E), decltype(C), Triangle + >::value; + + if constexpr (use_custom) { + symmetric_matrix_rank_2k_update(impl::map_execpolicy_with_check(exec), A, B, E, C, t); + } else { + symmetric_matrix_rank_2k_update(impl::inline_exec_t{}, A, B, E, C, t); + } +} + +template +void symmetric_matrix_rank_2k_update( + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle t ) +{ + symmetric_matrix_rank_2k_update(impl::default_exec_t{}, A, B, E, C, t); +} + +#endif + +// Overwriting Rank-2k update of a Hermitian matrix template::value; if constexpr (use_custom) { @@ -274,6 +485,117 @@ void hermitian_matrix_rank_2k_update( hermitian_matrix_rank_2k_update(impl::default_exec_t{}, A, B, C, t); } +#if defined(LINALG_FIX_RANK_UPDATES) + +// Rank-2k update of a Hermitian matrix + +template +void hermitian_matrix_rank_2k_update( + impl::inline_exec_t&& /* exec */, + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle /* t */) +{ + constexpr bool lower_tri = + std::is_same_v; + using size_type = ::std::common_type_t; + + for (size_type j = 0; j < C.extent(1); ++j) { + const size_type i_lower = lower_tri ? j : size_type(0); + const size_type i_upper = lower_tri ? C.extent(0) : j+1; + C(j,j) = impl::real_if_needed(E(j,j)); + for (size_type i = i_lower; i < i_upper; ++i) { + for (size_type k = 0; k < A.extent(1); ++k) { + C(i,j) += A(i,k) * impl::conj_if_needed(B(j,k)) + B(i,k) * impl::conj_if_needed(A(j,k)); + } + } + } +} + +template +void hermitian_matrix_rank_2k_update( + ExecutionPolicy&& exec, + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle t) +{ + constexpr bool use_custom = is_custom_herm_mat_rank_2k_update_avail< + decltype(impl::map_execpolicy_with_check(exec)), decltype(A), decltype(B), decltype(E), decltype(C), Triangle + >::value; + + if constexpr (use_custom) { + hermitian_matrix_rank_2k_update(impl::map_execpolicy_with_check(exec), A, B, E, C, t); + } else { + hermitian_matrix_rank_2k_update(impl::inline_exec_t{}, A, B, E, C, t); + } +} + +template +void hermitian_matrix_rank_2k_update( + mdspan, Layout_A, Accessor_A> A, + mdspan, Layout_B, Accessor_B> B, + mdspan, Layout_E, Accessor_E> E, + mdspan, Layout_C, Accessor_C> C, + Triangle t ) +{ + hermitian_matrix_rank_2k_update(impl::default_exec_t{}, A, B, E, C, t); +} + +#endif + } // end namespace linalg } // end inline namespace __p1673_version_0 } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE diff --git a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp index aff5a991..4179565d 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp @@ -51,6 +51,8 @@ namespace linalg { namespace { +#if !defined(LINALG_FIX_RANK_UPDATES) + template struct is_custom_sym_mat_rank_k_update_avail : std::false_type {}; @@ -85,12 +87,44 @@ struct is_custom_sym_mat_rank_k_update_avail< > : std::true_type{}; +#else template -struct is_custom_sym_mat_rank_k_update_with_update_avail : std::false_type {}; +struct is_custom_sym_mat_rank_k_update_avail : std::false_type {}; + +template +struct is_custom_sym_mat_rank_k_update_avail< + Exec, void, A_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_sym_mat_rank_k_update_avail< + Exec, ScaleFactorType, A_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(symmetric_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; template -struct is_custom_sym_mat_rank_k_update_with_update_avail< +struct is_custom_sym_mat_rank_k_update_avail< Exec, void, A_t, E_t, C_t, Tr_t, std::enable_if_t< std::is_void_v< @@ -106,7 +140,7 @@ struct is_custom_sym_mat_rank_k_update_with_update_avail< : std::true_type{}; template -struct is_custom_sym_mat_rank_k_update_with_update_avail< +struct is_custom_sym_mat_rank_k_update_avail< Exec, ScaleFactorType, A_t, E_t, C_t, Tr_t, std::enable_if_t< std::is_void_v< @@ -122,6 +156,9 @@ struct is_custom_sym_mat_rank_k_update_with_update_avail< > : std::true_type{}; +#endif + +#if !defined(LINALG_FIX_RANK_UPDATES) template struct is_custom_herm_mat_rank_k_update_avail : std::false_type {}; @@ -157,12 +194,44 @@ struct is_custom_herm_mat_rank_k_update_avail< > : std::true_type{}; +#else + template -struct is_custom_herm_mat_rank_k_update_with_update_avail : std::false_type {}; +struct is_custom_herm_mat_rank_k_update_avail : std::false_type {}; +template +struct is_custom_herm_mat_rank_k_update_avail< + Exec, void, A_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; + +template +struct is_custom_herm_mat_rank_k_update_avail< + Exec, ScaleFactorType, A_t, void, C_t, Tr_t, + std::enable_if_t< + std::is_void_v< + decltype(hermitian_matrix_rank_k_update(std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval())) + > + && ! impl::is_inline_exec_v + > + > + : std::true_type{}; template -struct is_custom_herm_mat_rank_k_update_with_update_avail< +struct is_custom_herm_mat_rank_k_update_avail< Exec, void, A_t, E_t, C_t, Tr_t, std::enable_if_t< std::is_void_v< @@ -178,7 +247,7 @@ struct is_custom_herm_mat_rank_k_update_with_update_avail< : std::true_type{}; template -struct is_custom_herm_mat_rank_k_update_with_update_avail< +struct is_custom_herm_mat_rank_k_update_avail< Exec, ScaleFactorType, A_t, E_t, C_t, Tr_t, std::enable_if_t< std::is_void_v< @@ -194,6 +263,8 @@ struct is_custom_herm_mat_rank_k_update_with_update_avail< > : std::true_type{}; +#endif + } //end anonym namespace // Rank-k update of a symmetric matrix with scaling factor alpha @@ -230,7 +301,9 @@ void symmetric_matrix_rank_k_update( const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) C(i, j) = ElementType_C{}; +#endif for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += alpha * A(i, k) * A(j, k); } @@ -267,7 +340,11 @@ void symmetric_matrix_rank_k_update( { constexpr bool use_custom = is_custom_sym_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), - ScaleFactorType, decltype(A), decltype(C), Triangle>::value; + ScaleFactorType, decltype(A), +#if defined(LINALG_FIX_RANK_UPDATES) + void, +#endif + decltype(C), Triangle>::value; if constexpr (use_custom) { symmetric_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), alpha, A, C, t); @@ -335,7 +412,9 @@ void symmetric_matrix_rank_k_update( const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) C(i, j) = ElementType_C{}; +#endif for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += A(i, k) * A(j, k); } @@ -367,7 +446,11 @@ void symmetric_matrix_rank_k_update( Triangle t) { constexpr bool use_custom = is_custom_sym_mat_rank_k_update_avail< - decltype(impl::map_execpolicy_with_check(exec)), void, decltype(A), decltype(C), Triangle + decltype(impl::map_execpolicy_with_check(exec)), void, decltype(A), +#if defined(LINALG_FIX_RANK_UPDATES) + void, +#endif + decltype(C), Triangle >::value; if constexpr (use_custom) { @@ -400,6 +483,8 @@ void symmetric_matrix_rank_k_update( symmetric_matrix_rank_k_update(impl::default_exec_t{}, A, C, t); } +#if defined(LINALG_FIX_RANK_UPDATES) + // Rank-k update of a symmetric matrix (updating versions) // Rank-k update of a symmetric matrix with scaling factor alpha @@ -480,7 +565,7 @@ void symmetric_matrix_rank_k_update( mdspan, Layout_C, Accessor_C> C, Triangle t) { - constexpr bool use_custom = is_custom_sym_mat_rank_k_update_with_update_avail< + constexpr bool use_custom = is_custom_sym_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), ScaleFactorType, decltype(A), decltype(E), decltype(C), Triangle>::value; @@ -634,6 +719,8 @@ void symmetric_matrix_rank_k_update( symmetric_matrix_rank_k_update(impl::default_exec_t{}, A, E, C, t); } +#endif + // Rank-k update of a Hermitian matrix // Rank-k update of a Hermitian matrix with scaling factor alpha @@ -671,9 +758,13 @@ void hermitian_matrix_rank_k_update( for (size_type j = 0; j < C.extent(1); ++j) { const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; - //C(j, j) = impl::real_if_needed(C(j, j)); +#if !defined(LINALG_FIX_RANK_UPDATES) + C(j, j) = impl::real_if_needed(C(j, j)); +#endif for (size_type i = i_lower; i < i_upper; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) C(i, j) = ElementType_C{}; +#endif for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += alpha * A(i, k) * impl::conj_if_needed(A(j, k)); } @@ -709,7 +800,11 @@ void hermitian_matrix_rank_k_update( { constexpr bool use_custom = is_custom_herm_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), - ScaleFactorType, decltype(A), decltype(C), Triangle>::value; + ScaleFactorType, decltype(A), +#if defined(LINALG_FIX_RANK_UPDATES) + void, +#endif + decltype(C), Triangle>::value; if constexpr (use_custom) { hermitian_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), alpha, A, C, t); @@ -775,11 +870,15 @@ void hermitian_matrix_rank_k_update( using size_type = std::common_type_t; for (size_type j = 0; j < C.extent(1); ++j) { - //C(j, j) = impl::real_if_needed(C(j, j)); +#if !defined(LINALG_FIX_RANK_UPDATES) + C(j, j) = impl::real_if_needed(C(j, j)); +#endif const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { +#if defined(LINALG_FIX_RANK_UPDATES) C(i, j) = ElementType_C{}; +#endif for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += A(i, k) * impl::conj_if_needed(A(j, k)); } @@ -812,7 +911,11 @@ void hermitian_matrix_rank_k_update( { constexpr bool use_custom = is_custom_herm_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), - void, decltype(A), decltype(C), Triangle>::value; + void, decltype(A), +#if defined(LINALG_FIX_RANK_UPDATES) + void, +#endif + decltype(C), Triangle>::value; if constexpr (use_custom) { hermitian_matrix_rank_k_update(impl::map_execpolicy_with_check(exec), A, C, t); @@ -844,6 +947,9 @@ void hermitian_matrix_rank_k_update( hermitian_matrix_rank_k_update(impl::default_exec_t{}, A, C, t); } + +#if defined(LINALG_FIX_RANK_UPDATES) + // Rank-k update of a Hermitian matrix with scaling factor alpha (updating version) MDSPAN_TEMPLATE_REQUIRES( @@ -925,7 +1031,7 @@ void hermitian_matrix_rank_k_update( mdspan, Layout_C, Accessor_C> C, Triangle t) { - constexpr bool use_custom = is_custom_herm_mat_rank_k_update_with_update_avail< + constexpr bool use_custom = is_custom_herm_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), ScaleFactorType, decltype(A), decltype(E), decltype(C), Triangle>::value; @@ -1046,7 +1152,7 @@ void hermitian_matrix_rank_k_update( mdspan, Layout_C, Accessor_C> C, Triangle t) { - constexpr bool use_custom = is_custom_herm_mat_rank_k_update_with_update_avail< + constexpr bool use_custom = is_custom_herm_mat_rank_k_update_avail< decltype(impl::map_execpolicy_with_check(exec)), void, decltype(A), decltype(E), decltype(C), Triangle>::value; @@ -1086,6 +1192,8 @@ void hermitian_matrix_rank_k_update( hermitian_matrix_rank_k_update(impl::default_exec_t{}, A, E, C, t); } +#endif + } // end namespace linalg } // end inline namespace __p1673_version_0 } // end namespace MDSPAN_IMPL_PROPOSED_NAMESPACE diff --git a/tests/native/CMakeLists.txt b/tests/native/CMakeLists.txt index e51ee016..eef1a073 100644 --- a/tests/native/CMakeLists.txt +++ b/tests/native/CMakeLists.txt @@ -27,6 +27,7 @@ linalg_add_test(ger) linalg_add_test(gerc) linalg_add_test(givens) linalg_add_test(hemm) +linalg_add_test(her2k) linalg_add_test(herk) linalg_add_test(her) linalg_add_test(idx_abs_max) @@ -43,6 +44,7 @@ linalg_add_test(scaled) linalg_add_test(swap) linalg_add_test(symm) linalg_add_test(syr) +linalg_add_test(syr2k) linalg_add_test(syrk) linalg_add_test(transposed) linalg_add_test(trmm) diff --git a/tests/native/her2k.cpp b/tests/native/her2k.cpp new file mode 100644 index 00000000..68a57409 --- /dev/null +++ b/tests/native/her2k.cpp @@ -0,0 +1,120 @@ +#include "./gtest_fixtures.hpp" + +namespace { + using LinearAlgebra::symmetric_matrix_rank_k_update; + using LinearAlgebra::transposed; + using LinearAlgebra::scaled; + using LinearAlgebra::upper_triangle; + + // This is a regression test that follows on from + // https://github.com/kokkos/stdBLAS/issues/261 . + // + // The reference implementation needs to implement all constraints + // of hermitian_matrix_rank_k_update in order to disambiguate + // overloads. + TEST(BLAS3_herk, AmbiguousOverloads) + { + constexpr auto map_C = layout_left::mapping{extents{}}; + constexpr auto map_expected = map_C; + + // Fill in the "empty spots" with a large negative value. + // The algorithm should never see it, but if it does, + // then the results will be wrong in an obvious way. + constexpr double flag = -1000.0; + constexpr std::array WA_original{ + 1.0, 2.0, 3.0, 3.0, 5.0, 6.0}; + // [1.0 3.0] + // [2.0 5.0] + // [3.0 6.0] + constexpr std::array WC_original{ + 1.0, flag, flag, 3.0, 4.0, flag, 2.0, 5.0, 7.0}; + // [1.0 3.0 2.0] + // [*** 4.0 5.0] + // [*** *** 7.0] + + // [1.0 3.0] [1.0 2.0 3.0] [10.0 17.0 21.0] + // [2.0 5.0] * [3.0 5.0 6.0] = [17.0 29.0 36.0] + // [3.0 6.0] [21.0 36.0 45.0] + // + // [1.0 3.0 2.0] [10.0 17.0 21.0] [11.0 20.0 23.0] + // [*** 4.0 5.0] + [17.0 29.0 36.0] = [ *** 33.0 41.0] + // [*** *** 7.0] [21.0 36.0 45.0] [ *** *** 52.0] + constexpr std::array expected_storage_original{ + 11.0, flag, flag, 20.0, 33.0, flag, 23.0, 41.0, 52.0}; + + std::array WC = WC_original; + mdspan C{WC.data(), map_C}; + mdspan C_original{WC_original.data(), map_C}; + + std::array WA = WA_original; + mdspan A{WA.data(), layout_left::mapping{extents{}}}; + + std::array expected_storage = expected_storage_original; + mdspan expected{expected_storage.data(), map_expected}; + + hermitian_matrix_rank_k_update(1.0, A, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + // Reset values, just in case some bug might have overwritten them. + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + hermitian_matrix_rank_k_update(A, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + hermitian_matrix_rank_k_update(A, C, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = expected(row, col); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + hermitian_matrix_rank_k_update(A, scaled(2., C), C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)+C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } +#endif + } + +} // end anonymous namespace diff --git a/tests/native/herk.cpp b/tests/native/herk.cpp index 95c1bb17..a04961ac 100644 --- a/tests/native/herk.cpp +++ b/tests/native/herk.cpp @@ -52,11 +52,15 @@ namespace { std::array expected_storage = expected_storage_original; mdspan expected{expected_storage.data(), map_expected}; - hermitian_matrix_rank_k_update(1.0, A, C, upper_triangle); + hermitian_matrix_rank_2k_update(scaled(0.5, A), A,C, upper_triangle); for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } @@ -67,21 +71,27 @@ namespace { WC = WC_original; expected_storage = expected_storage_original; - hermitian_matrix_rank_k_update(A, C, upper_triangle); + hermitian_matrix_rank_2k_update(scaled(0.5, A), A,C, upper_triangle); for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } +#if defined(LINALG_FIX_RANK_UPDATES) + WA = WA_original; WC = WC_original; expected_storage = expected_storage_original; - hermitian_matrix_rank_k_update(A, C, C, upper_triangle); + hermitian_matrix_rank_2k_update(scaled(0.5, A), A,C, C, upper_triangle); for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { @@ -95,7 +105,7 @@ namespace { WC = WC_original; expected_storage = expected_storage_original; - hermitian_matrix_rank_k_update(A, scaled(2., C), C, upper_triangle); + hermitian_matrix_rank_2k_update(scaled(0.5, A), A,scaled(2., C), C, upper_triangle); for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { @@ -104,6 +114,7 @@ namespace { EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } +#endif } } // end anonymous namespace diff --git a/tests/native/syr2k.cpp b/tests/native/syr2k.cpp new file mode 100644 index 00000000..df4fa05d --- /dev/null +++ b/tests/native/syr2k.cpp @@ -0,0 +1,119 @@ +#include "./gtest_fixtures.hpp" + +namespace { + using LinearAlgebra::symmetric_matrix_rank_2k_update; + using LinearAlgebra::transposed; + using LinearAlgebra::scaled; + using LinearAlgebra::upper_triangle; + + // Regression test for https://github.com/kokkos/stdBLAS/issues/261 + // + // The reference implementation needs to implement all constraints + // of symmetric_matrix_rank_2k_update in order to disambiguate + // overloads. + TEST(BLAS3_syr2k, AmbiguousOverloads_Issue261) + { + constexpr auto map_C = layout_left::mapping{extents{}}; + constexpr auto map_expected = map_C; + + // Fill in the "empty spots" with a large negative value. + // The algorithm should never see it, but if it does, + // then the results will be wrong in an obvious way. + constexpr double flag = -1000.0; + constexpr std::array WA_original{ + 1.0, 2.0, 3.0, 3.0, 5.0, 6.0}; + // [1.0 3.0] + // [2.0 5.0] + // [3.0 6.0] + constexpr std::array WC_original{ + 1.0, flag, flag, 3.0, 4.0, flag, 2.0, 5.0, 7.0}; + // [1.0 3.0 2.0] + // [*** 4.0 5.0] + // [*** *** 7.0] + + // [1.0 3.0] [1.0 2.0 3.0] [10.0 17.0 21.0] + // [2.0 5.0] * [3.0 5.0 6.0] = [17.0 29.0 36.0] + // [3.0 6.0] [21.0 36.0 45.0] + // + // [1.0 3.0 2.0] [10.0 17.0 21.0] [11.0 20.0 23.0] + // [*** 4.0 5.0] + [17.0 29.0 36.0] = [ *** 33.0 41.0] + // [*** *** 7.0] [21.0 36.0 45.0] [ *** *** 52.0] + constexpr std::array expected_storage_original{ + 11.0, flag, flag, 20.0, 33.0, flag, 23.0, 41.0, 52.0}; + + std::array WC = WC_original; + mdspan C{WC.data(), map_C}; + mdspan C_original{WC_original.data(), map_C}; + + std::array WA = WA_original; + mdspan A{WA.data(), layout_left::mapping{extents{}}}; + + std::array expected_storage = expected_storage_original; + mdspan expected{expected_storage.data(), map_expected}; + + symmetric_matrix_rank_2k_update(scaled(0.5,A), A, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + // Reset values, just in case some bug might have overwritten them. + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + symmetric_matrix_rank_2k_update(scaled(0.5,A), A, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + +#if defined(LINALG_FIX_RANK_UPDATES) + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + symmetric_matrix_rank_2k_update(scaled(0.5,A), A, C, C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = expected(row, col); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } + + WA = WA_original; + WC = WC_original; + expected_storage = expected_storage_original; + + symmetric_matrix_rank_2k_update(scaled(0.5,A), A, scaled(2., C), C, upper_triangle); + + for (std::size_t row = 0; row < C.extent(0); ++row) { + for (std::size_t col = 0; col < C.extent(1); ++col) { + const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)+C_original(row, col)); + const auto C_rc = C(row, col); + EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; + } + } +#endif + } + +} // end anonymous namespace diff --git a/tests/native/syrk.cpp b/tests/native/syrk.cpp index c1ee95d3..a42a4a80 100644 --- a/tests/native/syrk.cpp +++ b/tests/native/syrk.cpp @@ -55,7 +55,11 @@ namespace { for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } @@ -70,12 +74,18 @@ namespace { for (std::size_t row = 0; row < C.extent(0); ++row) { for (std::size_t col = 0; col < C.extent(1); ++col) { +#if !defined(LINALG_FIX_RANK_UPDATES) + const auto expected_rc = expected(row, col); +#else const auto expected_rc = (expected(row, col)==flag)?flag:(expected(row, col)-C_original(row, col)); +#endif const auto C_rc = C(row, col); EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } +#if defined(LINALG_FIX_RANK_UPDATES) + WA = WA_original; WC = WC_original; expected_storage = expected_storage_original; @@ -103,6 +113,8 @@ namespace { EXPECT_EQ(expected_rc, C_rc) << "at (" << row << ", " << col << ")"; } } +#endif + } } // end anonymous namespace From 4bb1db2fe35fecc5e427c4046d7f396e9c80d23b Mon Sep 17 00:00:00 2001 From: iburyl Date: Wed, 30 Oct 2024 09:56:03 -0700 Subject: [PATCH 10/10] fix in diagonal reading for updating versions of hermitian rank-k and rank-2k --- .../__p1673_bits/blas3_matrix_rank_2k_update.hpp | 2 +- .../__p1673_bits/blas3_matrix_rank_k_update.hpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp b/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp index 0d7bfd60..f5630a34 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_rank_2k_update.hpp @@ -521,8 +521,8 @@ void hermitian_matrix_rank_2k_update( for (size_type j = 0; j < C.extent(1); ++j) { const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; - C(j,j) = impl::real_if_needed(E(j,j)); for (size_type i = i_lower; i < i_upper; ++i) { + C(i,j) = (i==j)?impl::real_if_needed(E(i,j)):E(i,j); for (size_type k = 0; k < A.extent(1); ++k) { C(i,j) += A(i,k) * impl::conj_if_needed(B(j,k)) + B(i,k) * impl::conj_if_needed(A(j,k)); } diff --git a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp index 4179565d..8a40ffa9 100644 --- a/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp +++ b/include/experimental/__p1673_bits/blas3_matrix_rank_k_update.hpp @@ -990,9 +990,8 @@ void hermitian_matrix_rank_k_update( for (size_type j = 0; j < C.extent(1); ++j) { const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; - C(j, j) = impl::real_if_needed(E(j, j)); for (size_type i = i_lower; i < i_upper; ++i) { - C(i, j) = E(i, j); + C(i, j) = (i==j)?impl::real_if_needed(E(i, j)):E(i, j); for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += alpha * A(i, k) * impl::conj_if_needed(A(j, k)); } @@ -1111,11 +1110,10 @@ void hermitian_matrix_rank_k_update( using size_type = std::common_type_t; for (size_type j = 0; j < C.extent(1); ++j) { - C(j, j) = impl::real_if_needed(C(j, j)); const size_type i_lower = lower_tri ? j : size_type(0); const size_type i_upper = lower_tri ? C.extent(0) : j+1; for (size_type i = i_lower; i < i_upper; ++i) { - C(i, j) = E(i, j); + C(i, j) = (i==j)?impl::real_if_needed(E(i, j)):E(i, j); for (size_type k = 0; k < A.extent(1); ++k) { C(i, j) += A(i, k) * impl::conj_if_needed(A(j, k)); }