diff --git a/include/ddc/for_each.hpp b/include/ddc/for_each.hpp index 32c26462a..c1fedfae8 100644 --- a/include/ddc/for_each.hpp +++ b/include/ddc/for_each.hpp @@ -33,6 +33,23 @@ void for_each_serial( } } +template +KOKKOS_FUNCTION void annotated_for_each_serial( + std::array const& begin, + std::array const& end, + Functor const& f, + Is const&... is) noexcept +{ + static constexpr std::size_t I = sizeof...(Is); + if constexpr (I == N) { + f(RetType(is...)); + } else { + for (Element ii = begin[I]; ii < end[I]; ++ii) { + annotated_for_each_serial(begin, end, f, is..., ii); + } + } +} + } // namespace detail /** iterates over a nD domain in serial @@ -46,4 +63,21 @@ void for_each(Support const& domain, Functor&& f) noexcept detail::for_each_serial(domain, size, std::forward(f)); } +/** iterates over a nD domain in serial. Can be called from a device kernel. + * @param[in] domain the domain over which to iterate + * @param[in] f a functor taking an index as parameter + */ +template +KOKKOS_FUNCTION void annotated_for_each( + DiscreteDomain const& domain, + Functor&& f) noexcept +{ + DiscreteElement const ddc_begin = domain.front(); + DiscreteElement const ddc_end = domain.front() + domain.extents(); + std::array const begin = detail::array(ddc_begin); + std::array const end = detail::array(ddc_end); + detail::annotated_for_each_serial< + DiscreteElement>(begin, end, std::forward(f)); +} + } // namespace ddc diff --git a/include/ddc/transform_reduce.hpp b/include/ddc/transform_reduce.hpp index c9253f0f6..ca8b59444 100644 --- a/include/ddc/transform_reduce.hpp +++ b/include/ddc/transform_reduce.hpp @@ -48,6 +48,50 @@ T transform_reduce_serial( DDC_IF_NVCC_THEN_POP } +/** A serial reduction over a nD domain. Can be called from a device kernel. + * @param[in] domain the range over which to apply the algorithm + * @param[in] neutral the neutral element of the reduction operation + * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the + * results of transform, the results of other reduce and neutral. + * @param[in] transform a unary FunctionObject that will be applied to each element of the input + * range. The return type must be acceptable as input to reduce + * @param[in] dcoords discrete elements from dimensions already in a loop + */ +template < + class... DDims, + class T, + class BinaryReductionOp, + class UnaryTransformOp, + class... DCoords> +KOKKOS_FUNCTION T annotated_transform_reduce_serial( + DiscreteDomain const& domain, + [[maybe_unused]] T const neutral, + BinaryReductionOp const& reduce, + UnaryTransformOp const& transform, + DCoords const&... dcoords) noexcept +{ + DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function) + if constexpr (sizeof...(DCoords) == sizeof...(DDims)) { + return transform(DiscreteElement(dcoords...)); + } else { + using CurrentDDim = type_seq_element_t>; + T result = neutral; + for (DiscreteElement const ii : select(domain)) { + result = reduce( + result, + annotated_transform_reduce_serial( + domain, + neutral, + reduce, + transform, + dcoords..., + ii)); + } + return result; + } + DDC_IF_NVCC_THEN_POP +} + } // namespace detail /** A reduction over a nD domain in serial @@ -72,4 +116,26 @@ T transform_reduce( std::forward(transform)); } +/** A reduction over a nD domain in serial. Can be called from a device kernel. + * @param[in] domain the range over which to apply the algorithm + * @param[in] neutral the neutral element of the reduction operation + * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the + * results of transform, the results of other reduce and neutral. + * @param[in] transform a unary FunctionObject that will be applied to each element of the input + * range. The return type must be acceptable as input to reduce + */ +template +KOKKOS_FUNCTION T annotated_transform_reduce( + DiscreteDomain const& domain, + T neutral, + BinaryReductionOp&& reduce, + UnaryTransformOp&& transform) noexcept +{ + return detail::annotated_transform_reduce_serial( + domain, + neutral, + std::forward(reduce), + std::forward(transform)); +} + } // namespace ddc diff --git a/tests/for_each.cpp b/tests/for_each.cpp index aa479b799..5b3b087bb 100644 --- a/tests/for_each.cpp +++ b/tests/for_each.cpp @@ -10,6 +10,8 @@ #include +#include + inline namespace anonymous_namespace_workaround_for_each_cpp { using DElem0D = ddc::DiscreteElement<>; @@ -81,3 +83,71 @@ TEST(ForEachSerialHost, TwoDimensions) ddc::for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; }); EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); } + +void TestAnnotatedForEachSerialDevice1D(ddc::ChunkSpan< + int, + DDomX, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> view) +{ + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + ddc::annotated_for_each(view.domain(), [=](DElemX const ix) { view(ix) = 1; }); + }); +} + +TEST(AnnotatedForEachSerialDevice, OneDimension) +{ + DDomX const dom(lbound_x, nelems_x); + Kokkos::View const + storage("", dom.size()); + ddc::ChunkSpan< + int, + DDomX, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom); + TestAnnotatedForEachSerialDevice1D(view); + EXPECT_EQ( + Kokkos::Experimental:: + count(Kokkos::DefaultExecutionSpace(), + Kokkos::Experimental::begin(storage), + Kokkos::Experimental::end(storage), + 1), + dom.size()); +} + +void TestAnnotatedForEachSerialDevice2D(ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> view) +{ + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + ddc::annotated_for_each(view.domain(), [=](DElemXY const ixy) { view(ixy) = 1; }); + }); +} + +TEST(AnnotatedForEachSerialDevice, TwoDimensions) +{ + DDomXY const dom(lbound_x_y, nelems_x_y); + Kokkos::View const + storage("", dom.size()); + ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom); + TestAnnotatedForEachSerialDevice2D(view); + EXPECT_EQ( + Kokkos::Experimental:: + count(Kokkos::DefaultExecutionSpace(), + Kokkos::Experimental::begin(storage), + Kokkos::Experimental::end(storage), + 1), + dom.size()); +} diff --git a/tests/print.cpp b/tests/print.cpp index 82a995da8..0571d50d7 100644 --- a/tests/print.cpp +++ b/tests/print.cpp @@ -267,13 +267,12 @@ void PrintTestMetadata() print_type_info(ss, chunk_span); EXPECT_THAT( ss.str(), - testing::MatchesRegex( - "anonymous_namespace_workaround_print_cpp::Dim0\\(5\\)×" - "anonymous_namespace_workaround_print_cpp::Dim1\\(5\\)\n" - "ddc::ChunkSpan" - ", Kokkos::layout_.+, Kokkos::.+Space>\n")); + testing::MatchesRegex("anonymous_namespace_workaround_print_cpp::Dim0\\(5\\)×" + "anonymous_namespace_workaround_print_cpp::Dim1\\(5\\)\n" + "ddc::ChunkSpan" + ", Kokkos::layout_.+, Kokkos::.+Space>\n")); } } diff --git a/tests/transform_reduce.cpp b/tests/transform_reduce.cpp index 5009bc7b3..0f6c4317a 100644 --- a/tests/transform_reduce.cpp +++ b/tests/transform_reduce.cpp @@ -8,6 +8,8 @@ #include +#include + inline namespace anonymous_namespace_workaround_transform_reduce_cpp { using DElem0D = ddc::DiscreteElement<>; @@ -78,3 +80,39 @@ TEST(TransformReduce, TwoDimensions) ddc::transform_reduce(dom, 0, ddc::reducer::sum(), chunk), dom.size() * (dom.size() - 1) / 2); } + +int TestAnnotatedTransformReduce(ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> chunk) +{ + Kokkos::View const count(""); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + count() = ddc::annotated_transform_reduce( + chunk.domain(), + 0, + ddc::reducer::sum(), + chunk); + }); + Kokkos::View const count_host + = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), count); + return count_host(); +} + +TEST(AnnotatedTransformReduce, TwoDimensions) +{ + DDomXY const dom(lbound_x_y, nelems_x_y); + Kokkos::View const + storage("", dom.size()); + Kokkos::Experimental::fill(Kokkos::DefaultExecutionSpace(), storage, 1); + ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const chunk(storage.data(), dom); + EXPECT_EQ(TestAnnotatedTransformReduce(chunk), dom.size()); +} diff --git a/vendor/kokkos b/vendor/kokkos index 0d4a2d38b..175257a51 160000 --- a/vendor/kokkos +++ b/vendor/kokkos @@ -1 +1 @@ -Subproject commit 0d4a2d38ba14db2a741575639b40c31f3232185e +Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 diff --git a/vendor/kokkos-fft b/vendor/kokkos-fft index fad9d20ab..b460f187a 160000 --- a/vendor/kokkos-fft +++ b/vendor/kokkos-fft @@ -1 +1 @@ -Subproject commit fad9d20abe0113d6d215881878ef707ff2dcd5e6 +Subproject commit b460f187a309a392766031e8e231e80e4b9a6c7c diff --git a/vendor/kokkos-kernels b/vendor/kokkos-kernels index 8fddd113d..6e2ba940f 160000 --- a/vendor/kokkos-kernels +++ b/vendor/kokkos-kernels @@ -1 +1 @@ -Subproject commit 8fddd113d9d760e69d24c4a17288c14424299081 +Subproject commit 6e2ba940f5c8efc3ad1d7aca8cf4313073a186de