Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a8a4cbc
init
blegouix Dec 15, 2024
2ce680e
wip
blegouix Dec 15, 2024
00469c3
wip
blegouix Dec 15, 2024
81d8c9b
wip
blegouix Dec 15, 2024
4b83f7c
wip
blegouix Dec 15, 2024
f6d53fc
wip
blegouix Dec 15, 2024
df3da08
fix
blegouix Dec 15, 2024
98866b4
Apply suggestions from code review
blegouix Dec 15, 2024
d1f17f6
Apply suggestions from code review
blegouix Dec 15, 2024
f85b51a
Apply suggestions from code review
blegouix Dec 15, 2024
63b15d8
Apply suggestions from code review
blegouix Dec 15, 2024
8edf5fd
wip
blegouix Dec 15, 2024
95ba907
Update for_each.hpp
blegouix Dec 15, 2024
ef388cd
Update transform_reduce.hpp
blegouix Dec 15, 2024
102f0a9
Update for_each.cpp
blegouix Dec 15, 2024
b8a31a5
Update transform_reduce.cpp
blegouix Dec 15, 2024
b1d7075
clang-format
blegouix Dec 15, 2024
fa63605
wip
blegouix Dec 16, 2024
be9d263
doc
blegouix Dec 16, 2024
bbc693f
doc
blegouix Dec 16, 2024
ce927d9
1D test with for_each
blegouix Dec 17, 2024
f73122f
nonsense fix
blegouix Dec 30, 2024
5a2b798
Apply suggestions from code review
blegouix Dec 30, 2024
615f922
clang-format
blegouix Dec 30, 2024
6a3625e
HIPCC_COMPATIBLE_MAYBE_UNUSED
blegouix Dec 30, 2024
382d5b1
doc
blegouix Dec 30, 2024
6c37f29
comment the issue with nvcc
blegouix Dec 30, 2024
73ee089
fix attempt for hipcc
blegouix Dec 30, 2024
484f996
remove static_cast<int>
blegouix Jan 2, 2025
40a1dba
Merge branch 'main' into annotated_for_each
blegouix Jan 9, 2025
9bf07fc
Merge branch 'main' into annotated_for_each
blegouix Jan 11, 2025
ff64e39
Merge branch 'main' into annotated_for_each
blegouix Jan 31, 2025
2b6b686
Merge branch 'main' into annotated_for_each
blegouix Aug 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions include/ddc/for_each.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ void for_each_serial(
}
}

template <class RetType, class Element, std::size_t N, class Functor, class... Is>
KOKKOS_FUNCTION void annotated_for_each_serial(
std::array<Element, N> const& begin,
std::array<Element, N> const& end,
Functor const& f,
Is const&... is) noexcept
{
static constexpr std::size_t I = sizeof...(Is);
if constexpr (I == N) {
f(RetType(is...));
} else {
for (Element ii = begin[I]; ii < end[I]; ++ii) {
annotated_for_each_serial<RetType>(begin, end, f, is..., ii);
}
}
}

} // namespace detail

/** iterates over a nD domain in serial
Expand All @@ -46,4 +63,21 @@ void for_each(Support const& domain, Functor&& f) noexcept
detail::for_each_serial(domain, size, std::forward<Functor>(f));
}

/** iterates over a nD domain in serial. Can be called from a device kernel.
* @param[in] domain the domain over which to iterate
* @param[in] f a functor taking an index as parameter
*/
template <class... DDims, class Functor>
KOKKOS_FUNCTION void annotated_for_each(
DiscreteDomain<DDims...> const& domain,
Functor&& f) noexcept
{
DiscreteElement<DDims...> const ddc_begin = domain.front();
DiscreteElement<DDims...> const ddc_end = domain.front() + domain.extents();
std::array const begin = detail::array(ddc_begin);
std::array const end = detail::array(ddc_end);
detail::annotated_for_each_serial<
DiscreteElement<DDims...>>(begin, end, std::forward<Functor>(f));
}

} // namespace ddc
66 changes: 66 additions & 0 deletions include/ddc/transform_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,50 @@ T transform_reduce_serial(
DDC_IF_NVCC_THEN_POP
}

/** A serial reduction over a nD domain. Can be called from a device kernel.
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
* @param[in] dcoords discrete elements from dimensions already in a loop
*/
template <
class... DDims,
class T,
class BinaryReductionOp,
class UnaryTransformOp,
class... DCoords>
KOKKOS_FUNCTION T annotated_transform_reduce_serial(
DiscreteDomain<DDims...> const& domain,
[[maybe_unused]] T const neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform,
DCoords const&... dcoords) noexcept
{
DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function)
if constexpr (sizeof...(DCoords) == sizeof...(DDims)) {
return transform(DiscreteElement<DDims...>(dcoords...));
} else {
using CurrentDDim = type_seq_element_t<sizeof...(DCoords), detail::TypeSeq<DDims...>>;
T result = neutral;
for (DiscreteElement<CurrentDDim> const ii : select<CurrentDDim>(domain)) {
result = reduce(
result,
annotated_transform_reduce_serial(
domain,
neutral,
reduce,
transform,
dcoords...,
ii));
}
return result;
}
DDC_IF_NVCC_THEN_POP
}

} // namespace detail

/** A reduction over a nD domain in serial
Expand All @@ -72,4 +116,26 @@ T transform_reduce(
std::forward<UnaryTransformOp>(transform));
}

/** A reduction over a nD domain in serial. Can be called from a device kernel.
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <class... DDims, class T, class BinaryReductionOp, class UnaryTransformOp>
KOKKOS_FUNCTION T annotated_transform_reduce(
DiscreteDomain<DDims...> const& domain,
T neutral,
BinaryReductionOp&& reduce,
UnaryTransformOp&& transform) noexcept
{
return detail::annotated_transform_reduce_serial(
domain,
neutral,
std::forward<BinaryReductionOp>(reduce),
std::forward<UnaryTransformOp>(transform));
}

} // namespace ddc
70 changes: 70 additions & 0 deletions tests/for_each.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include <gtest/gtest.h>

#include <Kokkos_StdAlgorithms.hpp>

inline namespace anonymous_namespace_workaround_for_each_cpp {

using DElem0D = ddc::DiscreteElement<>;
Expand Down Expand Up @@ -81,3 +83,71 @@ TEST(ForEachSerialHost, TwoDimensions)
ddc::for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; });
EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size());
}

void TestAnnotatedForEachSerialDevice1D(ddc::ChunkSpan<
int,
DDomX,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> view)
{
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
ddc::annotated_for_each(view.domain(), [=](DElemX const ix) { view(ix) = 1; });
});
}

TEST(AnnotatedForEachSerialDevice, OneDimension)
{
DDomX const dom(lbound_x, nelems_x);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
ddc::ChunkSpan<
int,
DDomX,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom);
TestAnnotatedForEachSerialDevice1D(view);
EXPECT_EQ(
Kokkos::Experimental::
count(Kokkos::DefaultExecutionSpace(),
Kokkos::Experimental::begin(storage),
Kokkos::Experimental::end(storage),
1),
dom.size());
}

void TestAnnotatedForEachSerialDevice2D(ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> view)
{
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
ddc::annotated_for_each(view.domain(), [=](DElemXY const ixy) { view(ixy) = 1; });
});
}

TEST(AnnotatedForEachSerialDevice, TwoDimensions)
{
DDomXY const dom(lbound_x_y, nelems_x_y);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom);
TestAnnotatedForEachSerialDevice2D(view);
EXPECT_EQ(
Kokkos::Experimental::
count(Kokkos::DefaultExecutionSpace(),
Kokkos::Experimental::begin(storage),
Kokkos::Experimental::end(storage),
1),
dom.size());
}
13 changes: 6 additions & 7 deletions tests/print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,12 @@ void PrintTestMetadata()
print_type_info(ss, chunk_span);
EXPECT_THAT(
ss.str(),
testing::MatchesRegex(
"anonymous_namespace_workaround_print_cpp::Dim0\\(5\\)×"
"anonymous_namespace_workaround_print_cpp::Dim1\\(5\\)\n"
"ddc::ChunkSpan<double, ddc::DiscreteDomain"
"<anonymous_namespace_workaround_print_cpp::Dim0,"
" anonymous_namespace_workaround_print_cpp::Dim1>"
", Kokkos::layout_.+, Kokkos::.+Space>\n"));
testing::MatchesRegex("anonymous_namespace_workaround_print_cpp::Dim0\\(5\\)×"
"anonymous_namespace_workaround_print_cpp::Dim1\\(5\\)\n"
"ddc::ChunkSpan<double, ddc::DiscreteDomain"
"<anonymous_namespace_workaround_print_cpp::Dim0,"
" anonymous_namespace_workaround_print_cpp::Dim1>"
", Kokkos::layout_.+, Kokkos::.+Space>\n"));
}
}

Expand Down
38 changes: 38 additions & 0 deletions tests/transform_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <gtest/gtest.h>

#include <Kokkos_StdAlgorithms.hpp>

inline namespace anonymous_namespace_workaround_transform_reduce_cpp {

using DElem0D = ddc::DiscreteElement<>;
Expand Down Expand Up @@ -78,3 +80,39 @@ TEST(TransformReduce, TwoDimensions)
ddc::transform_reduce(dom, 0, ddc::reducer::sum<int>(), chunk),
dom.size() * (dom.size() - 1) / 2);
}

int TestAnnotatedTransformReduce(ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> chunk)
{
Kokkos::View<int, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const count("");
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
count() = ddc::annotated_transform_reduce(
chunk.domain(),
0,
ddc::reducer::sum<int>(),
chunk);
});
Kokkos::View<int, Kokkos::LayoutRight, Kokkos::DefaultHostExecutionSpace> const count_host
= Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), count);
return count_host();
}

TEST(AnnotatedTransformReduce, TwoDimensions)
{
DDomXY const dom(lbound_x_y, nelems_x_y);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
Kokkos::Experimental::fill(Kokkos::DefaultExecutionSpace(), storage, 1);
ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const chunk(storage.data(), dom);
EXPECT_EQ(TestAnnotatedTransformReduce(chunk), dom.size());
}
2 changes: 1 addition & 1 deletion vendor/kokkos
Submodule kokkos updated 420 files
2 changes: 1 addition & 1 deletion vendor/kokkos-fft
Submodule kokkos-fft updated 193 files
2 changes: 1 addition & 1 deletion vendor/kokkos-kernels
Submodule kokkos-kernels updated 334 files