Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simd Find #6302

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ namespace hpx::parallel::detail {
sequential_find_t<ExPolicy>, Iterator first, Sentinel last,
T const& value, Proj proj = Proj())
{
return util::loop_pred<
std::decay_t<hpx::execution::sequenced_policy>>(
return util::loop_pred<ExPolicy>(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add static_assert verifying that ExPolicy is actually a sequential policy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change had been made so sequential_find_t can accept sequential and unsequential execution policies. Do you want me to rather use static_assert(is_seq || is_unseq) ?

first, last, [&value, &proj](auto const& curr) {
return HPX_INVOKE(proj, *curr) == value;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <type_traits>
#include <utility>

// Please use static assert and enforce Iter to be Random Access Iterator
namespace hpx::parallel::util {
/*
Compiler and Hardware should also support vector operations for IterDiff,
Expand All @@ -42,7 +41,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -64,7 +63,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (IterDiff j = i; j < i + num_blocks; ++j)
{
std::int32_t const t = f(*(first + j));
std::int32_t const t = f(first + j);
simd_lane[j - i] = t;
found_flag |= t;
}
Expand All @@ -88,7 +87,7 @@ namespace hpx::parallel::util {
//Keep remainder scalar
while (i != n)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -108,7 +107,7 @@ namespace hpx::parallel::util {
// clang-format off
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
if (f(*(first1 + i), *(first2 + i)))
if (f(first1 + i, first2 + i))
break;
// clang-format on

Expand All @@ -129,8 +128,8 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (i = 0; i < num_blocks; ++i)
{
IterDiff const t = f(*(first1 + outer_loop_ind + i),
*(first2 + outer_loop_ind + i));
IterDiff const t = f(first1 + outer_loop_ind + i,
first2 + outer_loop_ind + i);
simd_lane[i] = t;
found_flag |= t;
}
Expand All @@ -152,7 +151,7 @@ namespace hpx::parallel::util {

//Keep remainder scalar
for (; outer_loop_ind != n; ++outer_loop_ind)
if (f(*(first1 + outer_loop_ind), *(first2 + outer_loop_ind)))
if (f(first1 + outer_loop_ind, first2 + outer_loop_ind))
break;

return std::make_pair(first1 + outer_loop_ind, first2 + outer_loop_ind);
Expand Down
15 changes: 13 additions & 2 deletions libs/core/algorithms/include/hpx/parallel/util/loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

#include <hpx/config.hpp>
#include <hpx/assert.hpp>
#include <hpx/concepts/concepts.hpp>
#include <hpx/datastructures/tuple.hpp>
#include <hpx/execution/traits/is_execution_policy.hpp>
#include <hpx/executors/execution_policy.hpp>
#include <hpx/functional/detail/invoke.hpp>
#include <hpx/functional/detail/tag_fallback_invoke.hpp>
#include <hpx/functional/invoke_result.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/unseq/simd_helpers.hpp>
#include <hpx/type_support/identity.hpp>

#include <algorithm>
Expand Down Expand Up @@ -149,7 +150,6 @@ namespace hpx::parallel::util {

///////////////////////////////////////////////////////////////////////////
namespace detail {

// Helper class to repeatedly call a function starting from a given
// iterator position till the predicate returns true.
template <typename Iterator>
Expand Down Expand Up @@ -185,6 +185,17 @@ namespace hpx::parallel::util {
}
};

template <typename Begin, typename End, typename Pred, typename ExPolicy,
HPX_CONCEPT_REQUIRES_(hpx::traits::is_random_access_iterator_v<Begin>&&
hpx::is_unsequenced_execution_policy_v<ExPolicy>)>
HPX_HOST_DEVICE HPX_FORCEINLINE Begin tag_invoke(
hpx::parallel::util::loop_pred_t<ExPolicy>, Begin HPX_RESTRICT begin,
End HPX_RESTRICT end, Pred&& pred)
{
return unseq_first_n(
begin, std::distance(begin, end), HPX_FORWARD(Pred, pred));
}

#if !defined(HPX_COMPUTE_DEVICE_CODE)
template <typename ExPolicy>
inline constexpr loop_pred_t<ExPolicy> loop_pred = loop_pred_t<ExPolicy>{};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
{
first_index = first_index % length;

std::vector<T> v(length, static_cast<T>(false));
std::vector<T> v(length);
std::size_t i = 0;

std::for_each(v.begin(), v.end(), [&](T& t) {
Expand All @@ -36,7 +36,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
i++;
});

auto f = [](T t) { return t; };
auto f = [](auto t) { return *t; };

auto iter_test = hpx::parallel::util::unseq_first_n(
v.begin(), static_cast<T>(length), f);
Expand Down Expand Up @@ -80,7 +80,7 @@ void test_unseq_first_n2_dispatch2(std::size_t length, std::size_t first_index)
idx++;
}

auto f = [](T t1, T t2) { return t1 && t2; };
auto f = [](auto t1, auto t2) { return *t1 && *t2; };

auto iter_pair_test = hpx::parallel::util::unseq2_first_n(
v1.begin(), v2.begin(), static_cast<T>(length), f);
Expand Down
Loading