From ac23395b582a5f88926fbd68bc00efed08c3e0d1 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Fri, 3 Jan 2025 14:29:51 -0600 Subject: [PATCH] Minor fixes and optimizations --- CMakeLists.txt | 3 + .../hpx/parallel/algorithms/for_loop.hpp | 129 +++++++++++------- .../algorithms/for_loop_induction.hpp | 57 ++++---- .../algorithms/for_loop_reduction.hpp | 31 +++-- .../hpx/parallel/datapar/iterator_helpers.hpp | 6 +- .../detail/eve/vector_pack_load_store.hpp | 8 +- .../detail/simd/vector_pack_load_store.hpp | 10 +- .../detail/vc/vector_pack_load_store.hpp | 26 ++-- .../traits/detail/vc/vector_pack_type.hpp | 4 +- .../executors/detail/index_queue_spawning.hpp | 36 +++-- .../iterator_support/counting_iterator.hpp | 17 ++- .../hpx/iterator_support/iterator_adaptor.hpp | 10 +- .../hpx/iterator_support/iterator_facade.hpp | 69 ++++++++-- .../iterator_support/traits/is_iterator.hpp | 3 +- .../src/runtime_configuration.cpp | 2 +- .../hpx/threading_base/thread_num_tss.hpp | 45 +++--- .../threading_base/src/thread_num_tss.cpp | 83 ++++++----- 17 files changed, 337 insertions(+), 202 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 943d75d0bc52..519c32921524 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1894,6 +1894,9 @@ if(WIN32) # Silence C++20 deprecation warnings hpx_add_config_cond_define(_SILENCE_ALL_CXX20_DEPRECATION_WARNINGS) + # Silence C++23 deprecation warnings + hpx_add_config_cond_define(_SILENCE_ALL_CXX23_DEPRECATION_WARNINGS) + # ASan is available in Visual Studion starting V16.8 if((MSVC_VERSION GREATER_EQUAL 1928) AND HPX_WITH_SANITIZERS) hpx_add_target_compile_option( diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop.hpp index bcb1b62c2b4a..fffdf62124f1 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // Copyright (c) 2016 Thomas Heller // // SPDX-License-Identifier: BSL-1.0 @@ -740,6 +740,7 @@ namespace hpx { namespace experimental { #include #include #include +#include #include #include #include @@ -755,6 +756,7 @@ namespace hpx { namespace experimental { #include #include #include +#include #include #include #include @@ -771,35 +773,39 @@ namespace hpx::parallel { /// \cond NOINTERNAL + /////////////////////////////////////////////////////////////////////// + HPX_HAS_XXX_TRAIT_DEF(needs_current_thread_num); + /////////////////////////////////////////////////////////////////////// template - HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void init_iteration( - hpx::tuple& args, hpx::util::index_pack, - std::size_t part_index) noexcept + HPX_HOST_DEVICE constexpr void init_iteration(hpx::tuple& args, + hpx::util::index_pack, std::size_t part_index, + std::size_t current_thread) noexcept { - (hpx::get(args).init_iteration(part_index), ...); + (hpx::get(args).init_iteration(part_index, current_thread), + ...); } template HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void invoke_iteration( hpx::tuple& args, hpx::util::index_pack, F&& f, - B part_begin) + B part_begin, std::size_t current_thread) { HPX_INVOKE(HPX_FORWARD(F, f), part_begin, - hpx::get(args).iteration_value()...); + hpx::get(args).iteration_value(current_thread)...); } template HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration( - hpx::tuple& args, hpx::util::index_pack) noexcept + hpx::tuple& args, hpx::util::index_pack, + std::size_t current_thread) noexcept { - (hpx::get(args).next_iteration(), ...); + (hpx::get(args).next_iteration(current_thread), ...); } template - HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void exit_iteration( - hpx::tuple& args, hpx::util::index_pack, - std::size_t size) noexcept + HPX_HOST_DEVICE constexpr void exit_iteration(hpx::tuple& args, + hpx::util::index_pack, std::size_t size) noexcept { (hpx::get(args).exit_iteration(size), ...); } @@ -832,53 +838,65 @@ namespace hpx::parallel { HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void operator()( B part_begin, std::size_t part_steps, std::size_t part_index) { + std::size_t current_thread = -1; + if constexpr (hpx::util::any_of_v...>>) + { + current_thread = hpx::get_worker_thread_num(); + } + auto pack = hpx::util::make_index_pack_t(); - detail::init_iteration(args_, pack, part_index); + detail::init_iteration(args_, pack, part_index, current_thread); if (stride_ == 1) { while (part_steps-- != 0) { - detail::invoke_iteration(args_, pack, f_, part_begin++); - detail::next_iteration(args_, pack); + detail::invoke_iteration( + args_, pack, f_, part_begin++, current_thread); + detail::next_iteration(args_, pack, current_thread); } } else if (stride_ > 0) { while (part_steps >= static_cast(stride_)) { - detail::invoke_iteration(args_, pack, f_, part_begin); + detail::invoke_iteration( + args_, pack, f_, part_begin, current_thread); part_begin = parallel::detail::next(part_begin, stride_); part_steps -= stride_; - detail::next_iteration(args_, pack); + detail::next_iteration(args_, pack, current_thread); } if (part_steps != 0) { - detail::invoke_iteration(args_, pack, f_, part_begin); - detail::next_iteration(args_, pack); + detail::invoke_iteration( + args_, pack, f_, part_begin, current_thread); + detail::next_iteration(args_, pack, current_thread); } } else { while (part_steps >= static_cast(-stride_)) { - detail::invoke_iteration(args_, pack, f_, part_begin); + detail::invoke_iteration( + args_, pack, f_, part_begin, current_thread); part_begin = parallel::detail::next(part_begin, stride_); part_steps += stride_; - detail::next_iteration(args_, pack); + detail::next_iteration(args_, pack, current_thread); } if (part_steps != 0) { - detail::invoke_iteration(args_, pack, f_, part_begin); - detail::next_iteration(args_, pack); + detail::invoke_iteration( + args_, pack, f_, part_begin, current_thread); + detail::next_iteration(args_, pack, current_thread); } } } @@ -1004,15 +1022,17 @@ namespace hpx::parallel { template HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void loop_iter( - B part_begin, E part_end, std::size_t part_index) + B part_begin, E part_end, std::size_t part_index, + std::uint32_t current_thread) { auto pack = hpx::util::make_index_pack_t(); - detail::init_iteration(args_, pack, part_index); + detail::init_iteration(args_, pack, part_index, current_thread); while (part_begin != part_end) { - detail::invoke_iteration(args_, pack, f_, part_begin++); - detail::next_iteration(args_, pack); + detail::invoke_iteration( + args_, pack, f_, part_begin++, current_thread); + detail::next_iteration(args_, pack, current_thread); } } @@ -1021,27 +1041,36 @@ namespace hpx::parallel { B part_begin, std::size_t part_steps, std::size_t part_index = 0) { + std::size_t current_thread = -1; + if constexpr (hpx::util::any_of_v...>>) + { + current_thread = hpx::get_worker_thread_num(); + } + if constexpr (hpx::traits::is_range_generator_v) { auto g = hpx::util::iterate(part_begin); - loop_iter( - hpx::util::begin(g), hpx::util::end(g), part_index); + loop_iter(hpx::util::begin(g), hpx::util::end(g), + part_index, current_thread); } else if constexpr (hpx::traits::is_range_v) { loop_iter(hpx::util::begin(part_begin), - hpx::util::end(part_begin), part_index); + hpx::util::end(part_begin), part_index, current_thread); } else { auto pack = hpx::util::make_index_pack_t(); - detail::init_iteration(args_, pack, part_index); + detail::init_iteration( + args_, pack, part_index, current_thread); - while (part_steps-- != 0) - { - detail::invoke_iteration(args_, pack, f_, part_begin++); - detail::next_iteration(args_, pack); - } + parallel::util::loop_n>( + part_begin, part_steps, [&](auto it) { + detail::invoke_iteration( + args_, pack, f_, it, current_thread); + detail::next_iteration(args_, pack, current_thread); + }); } } @@ -1284,8 +1313,15 @@ namespace hpx::parallel { ExPolicy&&, InIter first, Size size, S stride, F&& f, Arg&& arg, Args&&... args) { - arg.init_iteration(0); - (args.init_iteration(0), ...); + std::size_t current_thread = -1; + if constexpr (hpx::util::any_of_v...>>) + { + current_thread = hpx::get_worker_thread_num(); + } + + arg.init_iteration(0, current_thread); + (args.init_iteration(0, current_thread), ...); std::size_t count = size; if (stride > 0) @@ -1298,29 +1334,30 @@ namespace hpx::parallel { first = parallel::detail::next(first, stride); count -= stride; - arg.next_iteration(); - (args.next_iteration(), ...); + arg.next_iteration(current_thread); + (args.next_iteration(current_thread), ...); } } else { while (count >= static_cast(-stride)) { - HPX_INVOKE(f, first, arg.iteration_value(), - args.iteration_value()...); + HPX_INVOKE(f, first, + arg.iteration_value(current_thread), + args.iteration_value(current_thread)...); first = parallel::detail::next(first, stride); count += stride; - arg.next_iteration(); - (args.next_iteration(), ...); + arg.next_iteration(current_thread); + (args.next_iteration(current_thread), ...); } } if (count != 0) { - HPX_INVOKE(f, first, arg.iteration_value(), - args.iteration_value()...); + HPX_INVOKE(f, first, arg.iteration_value(current_thread), + args.iteration_value(current_thread)...); } // make sure live-out variables are properly set on return diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_induction.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_induction.hpp index 9fa50d3ce26f..db4d1addfb21 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_induction.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_induction.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -32,19 +32,20 @@ namespace hpx::parallel::detail { } HPX_HOST_DEVICE - constexpr void init_iteration(std::size_t index) noexcept + constexpr void init_iteration( + std::size_t index, std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(var_, index); } - HPX_HOST_DEVICE - constexpr T const& iteration_value() const noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr T const& iteration_value( + std::size_t /*current_thread*/) const noexcept { return curr_; } - HPX_HOST_DEVICE - constexpr void next_iteration() noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration( + std::size_t /*current_thread*/) noexcept { ++curr_; } @@ -68,19 +69,20 @@ namespace hpx::parallel::detail { } HPX_HOST_DEVICE - constexpr void init_iteration(std::size_t index) noexcept + constexpr void init_iteration( + std::size_t index, std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(var_, index); } - HPX_HOST_DEVICE - constexpr T const& iteration_value() const noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr T const& iteration_value( + std::size_t /*current_thread*/) const noexcept { return curr_; } - HPX_HOST_DEVICE - constexpr void next_iteration() noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration( + std::size_t /*current_thread*/) noexcept { ++curr_; } @@ -109,19 +111,20 @@ namespace hpx::parallel::detail { } HPX_HOST_DEVICE - constexpr void init_iteration(std::size_t index) noexcept + constexpr void init_iteration( + std::size_t index, std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(var_, stride_ * index); } - HPX_HOST_DEVICE - constexpr T const& iteration_value() const noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr T const& iteration_value( + std::size_t /*current_thread*/) const noexcept { return curr_; } - HPX_HOST_DEVICE - constexpr void next_iteration() noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration( + std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(curr_, stride_); } @@ -147,19 +150,20 @@ namespace hpx::parallel::detail { } HPX_HOST_DEVICE - constexpr void init_iteration(std::size_t index) noexcept + constexpr void init_iteration( + std::size_t index, std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(var_, stride_ * index); } - HPX_HOST_DEVICE - constexpr T const& iteration_value() const noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr T const& iteration_value( + std::size_t /*current_thread*/) const noexcept { return curr_; } - HPX_HOST_DEVICE - constexpr void next_iteration() noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration( + std::size_t /*current_thread*/) noexcept { curr_ = parallel::detail::next(curr_, stride_); } @@ -184,8 +188,8 @@ namespace hpx::parallel::detail { namespace hpx::experimental { /// The function template returns an induction object of unspecified type - /// having a value type and encapsulating an initial value \a value of that - /// type and, optionally, a stride. + /// having a value type and encapsulating an initial \a value of that type + /// and, optionally, a stride. /// /// For each element in the input range, a looping algorithm over input /// sequence \a S computes an induction value from an induction variable and @@ -207,10 +211,9 @@ namespace hpx::experimental { /// object (default: 1) /// /// \returns This returns an induction object with value type \a T, initial - /// value \a value, and (if specified) stride \a stride. If \a T is - /// an lvalue of non-const type, \a value is used as the live-out - /// object for the induction object; otherwise there is no live-out - /// object. + /// \a value, and (if specified) \a stride. If \a T is a lvalue of + /// non-const type, \a value is used as the live-out object for the + /// induction object; otherwise there is no live-out object. /// template HPX_FORCEINLINE constexpr hpx::parallel::detail::induction_stride_helper diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_reduction.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_reduction.hpp index d9b38d75ed0f..cb4490d7d7e5 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_reduction.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/for_loop_reduction.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -15,7 +15,6 @@ #include #include #include -#include #if !defined(HPX_HAVE_CXX17_SHARED_PTR_ARRAY) #include @@ -35,6 +34,8 @@ namespace hpx::parallel::detail { template struct reduction_helper { + using needs_current_thread_num = void; + template constexpr reduction_helper(T& var, T const& identity, Op_&& op) : var_(var) @@ -42,30 +43,44 @@ namespace hpx::parallel::detail { { std::size_t const cores = hpx::parallel::execution::detail::get_os_thread_count(); +#if defined(HPX_HAVE_CXX17_SHARED_PTR_ARRAY) + data_ = std::make_shared[]>(cores); +#else data_.reset(new hpx::util::cache_line_data[cores]); +#endif for (std::size_t i = 0; i != cores; ++i) + { data_[i].data_ = identity; + } } - static constexpr void init_iteration(std::size_t) + HPX_HOST_DEVICE static constexpr void init_iteration( + std::size_t /*index*/, + [[maybe_unused]] std::size_t current_thread) noexcept { - HPX_ASSERT(hpx::get_worker_thread_num() < + HPX_ASSERT(current_thread < hpx::parallel::execution::detail::get_os_thread_count()); } - constexpr T& iteration_value() + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr T& iteration_value( + std::size_t current_thread) noexcept { - return data_[hpx::get_worker_thread_num()].data_; + return data_[current_thread].data_; } - static constexpr void next_iteration() noexcept {} + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void next_iteration( + std::size_t /*current_thread*/) noexcept + { + } - void exit_iteration(std::size_t /*index*/) + HPX_HOST_DEVICE void exit_iteration(std::size_t /*index*/) { std::size_t const cores = hpx::parallel::execution::detail::get_os_thread_count(); for (std::size_t i = 0; i != cores; ++i) + { var_ = op_(var_, data_[i].data_); + } } private: diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp index 8292b8d0c8a4..2cff8d0e8136 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -30,7 +30,7 @@ namespace hpx::parallel::util::detail { template struct is_data_aligned_impl { - static HPX_FORCEINLINE bool call(Iter const& it) noexcept + static HPX_FORCEINLINE bool call(Iter& it) noexcept { using value_type = typename std::iterator_traits::value_type; using pack_type = traits::vector_pack_type_t; @@ -42,7 +42,7 @@ namespace hpx::parallel::util::detail { }; template - HPX_FORCEINLINE bool is_data_aligned(Iter const& it) noexcept + HPX_FORCEINLINE bool is_data_aligned(Iter& it) noexcept { return is_data_aligned_impl::call(it); } diff --git a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp index 0aa13869fb05..ff33cd9fe501 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp @@ -23,14 +23,14 @@ namespace hpx::parallel::traits { struct vector_pack_load { template - HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter const& iter) + HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter& iter) { return V( eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); } template - HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter const& iter) + HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter& iter) { return *iter; } @@ -42,7 +42,7 @@ namespace hpx::parallel::traits { { template HPX_HOST_DEVICE HPX_FORCEINLINE static void aligned( - V& value, Iter const& iter) + V& value, Iter& iter) { eve::store(value, eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); @@ -50,7 +50,7 @@ namespace hpx::parallel::traits { template HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( - V& value, Iter const& iter) + V& value, Iter& iter) { *iter = value; return; diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp index 8a23d078761f..c99347f17471 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp @@ -1,5 +1,5 @@ // Copyright (c) 2021 Srinivas Yadav -// Copyright (c) 2016-2017 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -25,14 +25,14 @@ namespace hpx::parallel::traits { struct vector_pack_load { template - HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter const& iter) + HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter& iter) { return V( std::addressof(*iter), datapar::experimental::vector_aligned); } template - HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter const& iter) + HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter& iter) { return *iter; } @@ -44,7 +44,7 @@ namespace hpx::parallel::traits { { template HPX_HOST_DEVICE HPX_FORCEINLINE static void aligned( - V& value, Iter const& iter) + V& value, Iter& iter) { value.copy_to( std::addressof(*iter), datapar::experimental::vector_aligned); @@ -52,7 +52,7 @@ namespace hpx::parallel::traits { template HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( - V& value, Iter const& iter) + V& value, Iter& iter) { *iter = value; } diff --git a/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_load_store.hpp index 4e16ac02e15a..710f2df86ff0 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_load_store.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // Copyright (c) 2016 Matthias Kretz // // SPDX-License-Identifier: BSL-1.0 @@ -69,13 +69,13 @@ namespace hpx::parallel::traits { using value_type = typename rebind_pack::type; template - static value_type aligned(Iter const& iter) + static value_type aligned(Iter& iter) { return value_type(std::addressof(*iter), Vc::Aligned); } template - static value_type unaligned(Iter const& iter) + static value_type unaligned(Iter& iter) { return value_type(std::addressof(*iter), Vc::Unaligned); } @@ -87,13 +87,13 @@ namespace hpx::parallel::traits { using value_type = typename rebind_pack>::type; template - static value_type aligned(Iter const& iter) + static value_type aligned(Iter& iter) { return *iter; } template - static value_type unaligned(Iter const& iter) + static value_type unaligned(Iter& iter) { return *iter; } @@ -107,13 +107,13 @@ namespace hpx::parallel::traits { typename rebind_pack>::type; template - static value_type aligned(Iter const& iter) + static value_type aligned(Iter& iter) { return *iter; } template - static value_type unaligned(Iter const& iter) + static value_type unaligned(Iter& iter) { return *iter; } @@ -124,13 +124,13 @@ namespace hpx::parallel::traits { struct vector_pack_store { template - static void aligned(V const& value, Iter const& iter) + static void aligned(V& value, Iter& iter) { value.store(std::addressof(*iter), Vc::Aligned); } template - static void unaligned(V const& value, Iter const& iter) + static void unaligned(V& value, Iter& iter) { value.store(std::addressof(*iter), Vc::Unaligned); } @@ -140,13 +140,13 @@ namespace hpx::parallel::traits { struct vector_pack_store> { template - static void aligned(V const& value, Iter const& iter) + static void aligned(V& value, Iter& iter) { *iter = value; } template - static void unaligned(V const& value, Iter const& iter) + static void unaligned(V& value, Iter& iter) { *iter = value; } @@ -157,13 +157,13 @@ namespace hpx::parallel::traits { struct vector_pack_store> { template - static void aligned(Value const& value, Iter const& iter) + static void aligned(Value& value, Iter& iter) { *iter = value; } template - static void unaligned(Value const& value, Iter const& iter) + static void unaligned(Value& value, Iter& iter) { *iter = value; } diff --git a/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_type.hpp b/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_type.hpp index 78c4d07ec412..fda118a24750 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_type.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/vc/vector_pack_type.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // Copyright (c) 2016 Matthias Kretz // // SPDX-License-Identifier: BSL-1.0 @@ -74,7 +74,7 @@ namespace hpx::parallel::traits { //////////////////////////////////////////////////////////////////// template struct vector_pack_mask_type::value>> + std::enable_if_t::value>> { using type = typename T::mask_type; }; diff --git a/libs/core/executors/include/hpx/executors/detail/index_queue_spawning.hpp b/libs/core/executors/include/hpx/executors/detail/index_queue_spawning.hpp index 927a1ca7b705..ecf3ad259627 100644 --- a/libs/core/executors/include/hpx/executors/detail/index_queue_spawning.hpp +++ b/libs/core/executors/include/hpx/executors/detail/index_queue_spawning.hpp @@ -1,5 +1,5 @@ // Copyright (c) 2019-2020 ETH Zurich -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // Copyright (c) 2019 Agustin Berge // // SPDX-License-Identifier: BSL-1.0 @@ -53,7 +53,7 @@ namespace hpx::parallel::execution::detail { bool const allow_stealing; template - static constexpr void bulk_invoke_helper( + HPX_FORCEINLINE static constexpr void bulk_invoke_helper( hpx::util::index_pack, F&& f, T&& t, Ts&& ts) { HPX_INVOKE(HPX_FORWARD(F, f), HPX_FORWARD(T, t), @@ -63,7 +63,8 @@ namespace hpx::parallel::execution::detail { // Perform the work in one element indexed by index. The index // represents a range of indices (iterators) in the given shape. template - void do_work_chunk(F&& f, Ts&& ts, std::uint32_t const index) const + HPX_FORCEINLINE void do_work_chunk( + F&& f, Ts&& ts, std::uint32_t const index) const { #if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX) static hpx::util::itt::event notify_event( @@ -104,7 +105,7 @@ namespace hpx::parallel::execution::detail { if (allow_stealing) { // Then steal from the opposite end of the neighboring queues - static constexpr auto opposite_end = + constexpr auto opposite_end = hpx::concurrency::detail::opposite_end_v; for (std::uint32_t offset = 1; offset != state->num_threads; @@ -123,20 +124,6 @@ namespace hpx::parallel::execution::detail { } } - // Execute task function - void do_work() const - { - // schedule chunks from the end, if needed - if (reverse_placement) - { - do_work(); - } - else - { - do_work(); - } - } - // Store an exception and mark that an exception was thrown in the // operation state. This function assumes that there is a current // exception. @@ -150,7 +137,7 @@ namespace hpx::parallel::execution::detail { // Finish the work for one worker thread. If this is not the last worker // thread to finish, it will only decrement the counter. If it is the // last thread it will call set_exception if there is an exception. - // Otherwise it will call set_value on the shared state. + // Otherwise, it will call set_value on the shared state. void finish() const { if (--(state->tasks_remaining.data_) == 0) @@ -186,7 +173,16 @@ namespace hpx::parallel::execution::detail { { try { - do_work(); + // Execute task function + if (reverse_placement) + { + // schedule chunks from the end, if needed + do_work(); + } + else + { + do_work(); + } } catch (std::bad_alloc const&) { diff --git a/libs/core/iterator_support/include/hpx/iterator_support/counting_iterator.hpp b/libs/core/iterator_support/include/hpx/iterator_support/counting_iterator.hpp index 0840b62d031a..8dc232f17541 100644 --- a/libs/core/iterator_support/include/hpx/iterator_support/counting_iterator.hpp +++ b/libs/core/iterator_support/include/hpx/iterator_support/counting_iterator.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2023 Hartmut Kaiser +// Copyright (c) 2020-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -84,7 +84,7 @@ namespace hpx::util { using type = iterator_adaptor, - Incrementable, Incrementable, traversal, Incrementable const&, + Incrementable, Incrementable, traversal, Incrementable&, difference>; }; } // namespace detail @@ -122,6 +122,11 @@ namespace hpx::util { { return this->base_reference(); } + + HPX_HOST_DEVICE constexpr typename base_type::reference dereference() + { + return this->base_reference(); + } }; template (n); } - HPX_HOST_DEVICE constexpr typename base_type::reference dereference() - const noexcept + HPX_HOST_DEVICE constexpr decltype(auto) dereference() const noexcept + { + return this->base_reference(); + } + + HPX_HOST_DEVICE constexpr decltype(auto) dereference() noexcept { return this->base_reference(); } diff --git a/libs/core/iterator_support/include/hpx/iterator_support/iterator_adaptor.hpp b/libs/core/iterator_support/include/hpx/iterator_support/iterator_adaptor.hpp index 13f48604cae8..67c570246d4a 100644 --- a/libs/core/iterator_support/include/hpx/iterator_support/iterator_adaptor.hpp +++ b/libs/core/iterator_support/include/hpx/iterator_support/iterator_adaptor.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2023 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -106,7 +106,7 @@ namespace hpx::util { // // Reference - the reference type of the resulting iterator, and in // particular, the result type of operator*(). If not supplied but - // Value is supplied, Value& is used. Otherwise + // Value is supplied, Value& is used. Otherwise, // iterator_traits::reference is used. // // Difference - the difference_type of the resulting iterator. If not @@ -149,13 +149,13 @@ namespace hpx::util { Category, Reference, Difference, Pointer>; // lvalue access to the Base object for Derived - HPX_HOST_DEVICE HPX_FORCEINLINE constexpr Base const& base_reference() - const noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE Base& base_reference() noexcept { return iterator_; } - HPX_HOST_DEVICE HPX_FORCEINLINE Base& base_reference() noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr Base const& base_reference() + const noexcept { return iterator_; } diff --git a/libs/core/iterator_support/include/hpx/iterator_support/iterator_facade.hpp b/libs/core/iterator_support/include/hpx/iterator_support/iterator_facade.hpp index 2769e209c3a0..4121809ef484 100644 --- a/libs/core/iterator_support/include/hpx/iterator_support/iterator_facade.hpp +++ b/libs/core/iterator_support/include/hpx/iterator_support/iterator_facade.hpp @@ -1,5 +1,5 @@ // Copyright (c) 2016 Thomas Heller -// Copyright (c) 2016-2023 Hartmut Kaiser +// Copyright (c) 2016-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -61,12 +61,23 @@ namespace hpx::util { it.decrement(); } + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Reference dereference( + Iterator& it) +#if !defined(HPX_MSVC) + // MSVC has issues with this + noexcept(noexcept(std::declval().dereference())) +#endif + { + return it.dereference(); + } + template HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Reference dereference( Iterator const& it) #if !defined(HPX_MSVC) // MSVC has issues with this - noexcept(noexcept(std::declval().dereference())) + noexcept(noexcept(std::declval().dereference())) #endif { return it.dereference(); @@ -105,12 +116,18 @@ namespace hpx::util { { } - HPX_HOST_DEVICE HPX_FORCEINLINE constexpr Reference* + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr decltype(auto) operator->() noexcept { return std::addressof(ref_); } + HPX_HOST_DEVICE HPX_FORCEINLINE constexpr decltype(auto) + operator->() const noexcept + { + return std::addressof(ref_); + } + Reference ref_; }; @@ -128,8 +145,20 @@ namespace hpx::util { { using type = T*; - HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr type call( - T& x) noexcept + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr decltype(auto) + call(T& x) noexcept + { + return std::addressof(x); + } + }; + + template + struct arrow_dispatch // "real" references + { + using type = T const*; + + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr decltype(auto) + call(T const& x) noexcept { return std::addressof(x); } @@ -165,22 +194,44 @@ namespace hpx::util { return *static_cast(this); } + using const_reference = + std::conditional_t, + std::add_lvalue_reference_t< + std::add_const_t>>, + std::add_const_t>; + public: - HPX_HOST_DEVICE constexpr decltype(auto) operator*() const - noexcept(noexcept(iterator_core_access::dereference( + HPX_HOST_DEVICE constexpr decltype(auto) operator*() noexcept( + noexcept(iterator_core_access::dereference( std::declval()))) { return iterator_core_access::dereference( this->derived()); } - HPX_HOST_DEVICE constexpr pointer operator->() const - noexcept(noexcept(iterator_core_access::dereference( + HPX_HOST_DEVICE constexpr decltype(auto) operator*() const noexcept( + noexcept(iterator_core_access::dereference( + std::declval()))) + { + return iterator_core_access::dereference( + this->derived()); + } + + HPX_HOST_DEVICE constexpr decltype(auto) operator->() noexcept( + noexcept(iterator_core_access::dereference( std::declval()))) { return arrow_dispatch::call(*this->derived()); } + HPX_HOST_DEVICE constexpr decltype(auto) operator->() const + noexcept( + noexcept(iterator_core_access::dereference( + std::declval()))) + { + return arrow_dispatch::call(*this->derived()); + } + HPX_HOST_DEVICE Derived& operator++() noexcept(noexcept( iterator_core_access::increment(std::declval()))) { diff --git a/libs/core/iterator_support/include/hpx/iterator_support/traits/is_iterator.hpp b/libs/core/iterator_support/include/hpx/iterator_support/traits/is_iterator.hpp index ab579147a272..a84513af56ff 100644 --- a/libs/core/iterator_support/include/hpx/iterator_support/traits/is_iterator.hpp +++ b/libs/core/iterator_support/include/hpx/iterator_support/traits/is_iterator.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2022 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // Copyright (c) 2019 Austin McCartney // // SPDX-License-Identifier: BSL-1.0 @@ -14,7 +14,6 @@ #include #include #include -#include namespace hpx::traits { diff --git a/libs/core/runtime_configuration/src/runtime_configuration.cpp b/libs/core/runtime_configuration/src/runtime_configuration.cpp index 205b5d85647d..0c30a6120663 100644 --- a/libs/core/runtime_configuration/src/runtime_configuration.cpp +++ b/libs/core/runtime_configuration/src/runtime_configuration.cpp @@ -150,7 +150,7 @@ namespace hpx::util { "${HPX_EXPECT_CONNECTING_LOCALITIES:0}", // add placeholders for keys to be added by command line handling - "os_threads = cores", + "os_threads = ${HPX_NUM_WORKER_THREADS:cores}", "cores = all", "localities = 1", "first_pu = 0", diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_num_tss.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_num_tss.hpp index f3a3ac9714f7..d54721bf9060 100644 --- a/libs/core/threading_base/include/hpx/threading_base/thread_num_tss.hpp +++ b/libs/core/threading_base/include/hpx/threading_base/thread_num_tss.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2023 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -19,22 +19,25 @@ namespace hpx::threads::detail { /// Set the global thread id to thread local storage. - HPX_CORE_EXPORT std::size_t set_global_thread_num_tss(std::size_t num); + HPX_CORE_EXPORT std::size_t set_global_thread_num_tss( + std::size_t num) noexcept; /// Get the global thread id from thread local storage. - HPX_CORE_EXPORT std::size_t get_global_thread_num_tss(); + HPX_CORE_EXPORT std::size_t get_global_thread_num_tss() noexcept; /// Set the local thread id to thread local storage. - HPX_CORE_EXPORT std::size_t set_local_thread_num_tss(std::size_t num); + HPX_CORE_EXPORT std::size_t set_local_thread_num_tss( + std::size_t num) noexcept; /// Get the local thread id from thread local storage. - HPX_CORE_EXPORT std::size_t get_local_thread_num_tss(); + HPX_CORE_EXPORT std::size_t get_local_thread_num_tss() noexcept; /// Set the thread pool id to thread local storage. - HPX_CORE_EXPORT std::size_t set_thread_pool_num_tss(std::size_t num); + HPX_CORE_EXPORT std::size_t set_thread_pool_num_tss( + std::size_t num) noexcept; /// Get the thread pool id from thread local storage. - HPX_CORE_EXPORT std::size_t get_thread_pool_num_tss(); + HPX_CORE_EXPORT std::size_t get_thread_pool_num_tss() noexcept; /// Holds the global and local thread numbers, and the pool number /// associated with the thread. @@ -45,13 +48,13 @@ namespace hpx::threads::detail { std::size_t thread_pool_num; }; - HPX_CORE_EXPORT void set_thread_nums_tss(thread_nums const&); - HPX_CORE_EXPORT thread_nums get_thread_nums_tss(); + HPX_CORE_EXPORT void set_thread_nums_tss(thread_nums const&) noexcept; + HPX_CORE_EXPORT thread_nums get_thread_nums_tss() noexcept; /////////////////////////////////////////////////////////////////////////// struct reset_tss_helper { - explicit reset_tss_helper(std::size_t global_thread_num) + explicit reset_tss_helper(std::size_t global_thread_num) noexcept : global_thread_num_(set_global_thread_num_tss(global_thread_num)) { } @@ -92,7 +95,7 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will /// fail otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_worker_thread_num(); + HPX_CORE_EXPORT std::size_t get_worker_thread_num() noexcept; /////////////////////////////////////////////////////////////////////////// /// \brief Return the number of the current OS-thread running in the runtime @@ -101,7 +104,8 @@ namespace hpx { /// This function returns the zero based index of the OS-thread which /// executes the current HPX-thread. /// - /// \param ec [in,out] this represents the error status on exit. + /// \param ec [in,out] this represents the error status on exit (obsolete, + /// ignored). /// /// \note The returned value is zero based and its maximum value is /// smaller than the overall number of OS-threads executed (as @@ -111,7 +115,7 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will /// fail otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_worker_thread_num(error_code& ec); + HPX_CORE_EXPORT std::size_t get_worker_thread_num(error_code&) noexcept; /////////////////////////////////////////////////////////////////////////// /// \brief Return the number of the current OS-thread running in the current @@ -127,7 +131,7 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will fail /// otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_local_worker_thread_num(); + HPX_CORE_EXPORT std::size_t get_local_worker_thread_num() noexcept; /////////////////////////////////////////////////////////////////////////// /// \brief Return the number of the current OS-thread running in the current @@ -136,7 +140,8 @@ namespace hpx { /// This function returns the zero based index of the OS-thread on the /// current thread pool which executes the current HPX-thread. /// - /// \param ec [in,out] this represents the error status on exit. + /// \param ec [in,out] this represents the error status on exit (obsolete, + /// ignored). /// /// \note The returned value is zero based and its maximum value is smaller /// than the number of OS-threads executed on the current thread pool. @@ -145,7 +150,8 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will fail /// otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_local_worker_thread_num(error_code& ec); + HPX_CORE_EXPORT std::size_t get_local_worker_thread_num( + error_code&) noexcept; /////////////////////////////////////////////////////////////////////////// /// \brief Return the number of the current thread pool the current @@ -161,7 +167,7 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will fail /// otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_thread_pool_num(); + HPX_CORE_EXPORT std::size_t get_thread_pool_num() noexcept; /////////////////////////////////////////////////////////////////////////// /// \brief Return the number of the current thread pool the current @@ -170,7 +176,8 @@ namespace hpx { /// This function returns the zero based index of the thread pool which /// executes the current HPX-thread. /// - /// \param ec [in,out] this represents the error status on exit. + /// \param ec [in,out] this represents the error status on exit (obsolete, + /// ignored). /// /// \note The returned value is zero based and its maximum value is smaller /// than the number of thread pools started by the runtime. It will @@ -179,7 +186,7 @@ namespace hpx { /// /// \note This function needs to be executed on a HPX-thread. It will fail /// otherwise (it will return -1). - HPX_CORE_EXPORT std::size_t get_thread_pool_num(error_code& ec); + HPX_CORE_EXPORT std::size_t get_thread_pool_num(error_code&) noexcept; } // namespace hpx #include diff --git a/libs/core/threading_base/src/thread_num_tss.cpp b/libs/core/threading_base/src/thread_num_tss.cpp index d6687271ab32..3ce83e94edca 100644 --- a/libs/core/threading_base/src/thread_num_tss.cpp +++ b/libs/core/threading_base/src/thread_num_tss.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2024 Hartmut Kaiser +// Copyright (c) 2007-2025 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -15,88 +15,103 @@ namespace hpx::threads::detail { namespace { - thread_nums& thread_nums_tss() + HPX_FORCEINLINE std::size_t& global_thread_num() noexcept { - thread_local thread_nums thread_nums_tss_ = { - static_cast(-1), static_cast(-1), - static_cast(-1)}; - return thread_nums_tss_; + thread_local std::size_t global_thread_num_ = + static_cast(-1); + return global_thread_num_; + } + + HPX_FORCEINLINE std::size_t& local_thread_num() noexcept + { + thread_local std::size_t local_thread_num_ = + static_cast(-1); + return local_thread_num_; + } + + HPX_FORCEINLINE std::size_t& thread_pool_num() noexcept + { + thread_local std::size_t thread_pool_num_ = + static_cast(-1); + return thread_pool_num_; } } // namespace - std::size_t set_global_thread_num_tss(std::size_t num) + std::size_t set_global_thread_num_tss(std::size_t num) noexcept { - std::swap(thread_nums_tss().global_thread_num, num); + std::swap(global_thread_num(), num); return num; } - std::size_t get_global_thread_num_tss() + std::size_t get_global_thread_num_tss() noexcept { - return thread_nums_tss().global_thread_num; + return global_thread_num(); } - std::size_t set_local_thread_num_tss(std::size_t num) + std::size_t set_local_thread_num_tss(std::size_t num) noexcept { - std::swap(thread_nums_tss().local_thread_num, num); + std::swap(local_thread_num(), num); return num; } - std::size_t get_local_thread_num_tss() + std::size_t get_local_thread_num_tss() noexcept { - return thread_nums_tss().local_thread_num; + return local_thread_num(); } - std::size_t set_thread_pool_num_tss(std::size_t num) + std::size_t set_thread_pool_num_tss(std::size_t num) noexcept { - std::swap(thread_nums_tss().thread_pool_num, num); + std::swap(thread_pool_num(), num); return num; } - std::size_t get_thread_pool_num_tss() + std::size_t get_thread_pool_num_tss() noexcept { - return thread_nums_tss().thread_pool_num; + return thread_pool_num(); } - void set_thread_nums_tss(const thread_nums& t) + void set_thread_nums_tss(thread_nums const& t) noexcept { - thread_nums_tss() = t; + global_thread_num() = t.global_thread_num; + local_thread_num() = t.local_thread_num; + thread_pool_num() = t.thread_pool_num; } - thread_nums get_thread_nums_tss() + thread_nums get_thread_nums_tss() noexcept { - return thread_nums_tss(); + return {global_thread_num(), local_thread_num(), thread_pool_num()}; } } // namespace hpx::threads::detail namespace hpx { - std::size_t get_worker_thread_num(error_code& /* ec */) + std::size_t get_worker_thread_num(error_code& /* ec */) noexcept { - return threads::detail::thread_nums_tss().global_thread_num; + return threads::detail::global_thread_num(); } - std::size_t get_worker_thread_num() + std::size_t get_worker_thread_num() noexcept { - return get_worker_thread_num(throws); + return threads::detail::global_thread_num(); } - std::size_t get_local_worker_thread_num(error_code& /* ec */) + std::size_t get_local_worker_thread_num(error_code& /* ec */) noexcept { - return threads::detail::thread_nums_tss().local_thread_num; + return threads::detail::local_thread_num(); } - std::size_t get_local_worker_thread_num() + std::size_t get_local_worker_thread_num() noexcept { - return get_local_worker_thread_num(throws); + return threads::detail::local_thread_num(); } - std::size_t get_thread_pool_num(error_code& /* ec */) + std::size_t get_thread_pool_num(error_code& /* ec */) noexcept { - return threads::detail::thread_nums_tss().thread_pool_num; + return threads::detail::thread_pool_num(); } - std::size_t get_thread_pool_num() + std::size_t get_thread_pool_num() noexcept { - return get_thread_pool_num(throws); + return threads::detail::thread_pool_num(); } } // namespace hpx