From 2a3ce35e4b61cefd2278f1a40d06742d98819a3e Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Wed, 26 May 2021 12:37:48 +0530 Subject: [PATCH 01/58] Add CUDA timestamp support to hardware clock --- .../include/hpx/hardware/timestamp/linux_generic.hpp | 9 +++++++++ .../include/hpx/hardware/timestamp/linux_x86_32.hpp | 9 +++++++++ .../include/hpx/hardware/timestamp/linux_x86_64.hpp | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index 05fde245b680..07596a1e6b46 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -14,11 +14,20 @@ namespace hpx { namespace util { namespace hardware { +#if defined(HPX_HAVE_CUDA) + HPX_HOST_DEVICE +#endif inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + std::uint64_t cur; + asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); + return cur; +#else struct timespec res; clock_gettime(CLOCK_MONOTONIC, &res); return 1000 * res.tv_sec + res.tv_nsec / 1000000; +#endif } }}} // namespace hpx::util::hardware diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index d3167fabd66f..f96276d90c00 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -13,8 +13,16 @@ namespace hpx { namespace util { namespace hardware { // clang-format off +#if defined(HPX_HAVE_CUDA) + HPX_HOST_DEVICE +#endif inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + std::uint64_t cur; + asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); + return cur; +#else std::uint64_t r = 0; #if defined(HPX_HAVE_RDTSCP) @@ -36,6 +44,7 @@ namespace hpx { namespace util { namespace hardware { #endif return r; +#endif } // clang-format on diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index 75c745285f6d..2ab75caee783 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -12,9 +12,17 @@ namespace hpx { namespace util { namespace hardware { +#if defined(HPX_HAVE_CUDA) + HPX_HOST_DEVICE +#endif // clang-format off inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + std::uint64_t cur; + asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); + return cur; +#else std::uint32_t lo = 0, hi = 0; #if defined(HPX_HAVE_RDTSCP) __asm__ __volatile__( @@ -31,6 +39,7 @@ namespace hpx { namespace util { namespace hardware { : "rbx", "rcx"); #endif return ((static_cast(hi)) << 32) | lo; +#endif } // clang-format on From 793676602be45a3d78a7c93179dfbd5c24e056a3 Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Wed, 26 May 2021 13:42:47 +0530 Subject: [PATCH 02/58] Get rid of #if around HPX_HOST_DEVICE --- .../include/hpx/hardware/timestamp/linux_generic.hpp | 7 +++---- .../include/hpx/hardware/timestamp/linux_x86_32.hpp | 6 ++---- .../include/hpx/hardware/timestamp/linux_x86_64.hpp | 6 ++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index 07596a1e6b46..3a28db75c21c 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -12,12 +12,11 @@ #include +#include + namespace hpx { namespace util { namespace hardware { -#if defined(HPX_HAVE_CUDA) - HPX_HOST_DEVICE -#endif - inline std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) std::uint64_t cur; diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index f96276d90c00..07ca038b5219 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -9,14 +9,12 @@ #pragma once #include +#include namespace hpx { namespace util { namespace hardware { // clang-format off -#if defined(HPX_HAVE_CUDA) - HPX_HOST_DEVICE -#endif - inline std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) std::uint64_t cur; diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index 2ab75caee783..c97e2e6e3bca 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -9,14 +9,12 @@ #pragma once #include +#include namespace hpx { namespace util { namespace hardware { -#if defined(HPX_HAVE_CUDA) - HPX_HOST_DEVICE -#endif // clang-format off - inline std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) std::uint64_t cur; From 572317aa7e5086f8b455ccf6a4de43f173d7aba6 Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Thu, 27 May 2021 19:22:33 +0530 Subject: [PATCH 03/58] Add separate implementation for cuda timestamp --- .../include/hpx/hardware/timestamp.hpp | 6 ++++- .../include/hpx/hardware/timestamp/cuda.hpp | 24 +++++++++++++++++++ .../hpx/hardware/timestamp/linux_generic.hpp | 8 +------ .../hpx/hardware/timestamp/linux_x86_32.hpp | 8 +------ .../hpx/hardware/timestamp/linux_x86_64.hpp | 8 +------ 5 files changed, 32 insertions(+), 22 deletions(-) create mode 100644 libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp diff --git a/libs/core/hardware/include/hpx/hardware/timestamp.hpp b/libs/core/hardware/include/hpx/hardware/timestamp.hpp index bb2864cd124c..79cd26bbc3bb 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp.hpp @@ -11,7 +11,11 @@ #include // clang-format off -#if defined(HPX_MSVC) +// nvcc complains about multiple definition error for same function prototype +// irrespective of __host__, __device__, and __host__ __device__ attributes. +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + #include +#elif defined(HPX_MSVC) #include #elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \ defined(__x86_64) || defined(_M_X64) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp new file mode 100644 index 000000000000..b4c60e6b7c6d --- /dev/null +++ b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp @@ -0,0 +1,24 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2012 Thomas Heller +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include + +#include + +namespace hpx { namespace util { namespace hardware { + + HPX_HOST_DEVICE std::uint64_t timestamp() + { + std::uint64_t cur; + asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); + return cur; + } + +}}} // namespace hpx::util::hardware \ No newline at end of file diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index 3a28db75c21c..f3fcbd8d057f 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -16,17 +16,11 @@ namespace hpx { namespace util { namespace hardware { - HPX_HOST_DEVICE inline std::uint64_t timestamp() + inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) - std::uint64_t cur; - asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); - return cur; -#else struct timespec res; clock_gettime(CLOCK_MONOTONIC, &res); return 1000 * res.tv_sec + res.tv_nsec / 1000000; -#endif } }}} // namespace hpx::util::hardware diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index 07ca038b5219..d43a576dec8b 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -14,13 +14,8 @@ namespace hpx { namespace util { namespace hardware { // clang-format off - HPX_HOST_DEVICE inline std::uint64_t timestamp() + inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) - std::uint64_t cur; - asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); - return cur; -#else std::uint64_t r = 0; #if defined(HPX_HAVE_RDTSCP) @@ -42,7 +37,6 @@ namespace hpx { namespace util { namespace hardware { #endif return r; -#endif } // clang-format on diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index c97e2e6e3bca..3ae9f86a9be8 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -14,13 +14,8 @@ namespace hpx { namespace util { namespace hardware { // clang-format off - HPX_HOST_DEVICE inline std::uint64_t timestamp() + inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) - std::uint64_t cur; - asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); - return cur; -#else std::uint32_t lo = 0, hi = 0; #if defined(HPX_HAVE_RDTSCP) __asm__ __volatile__( @@ -37,7 +32,6 @@ namespace hpx { namespace util { namespace hardware { : "rbx", "rcx"); #endif return ((static_cast(hi)) << 32) | lo; -#endif } // clang-format on From 034058848f135089baa9a0cfc9937cf72e160774 Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Sat, 5 Jun 2021 15:44:03 +0530 Subject: [PATCH 04/58] Finalize according to Mikael --- libs/core/hardware/CMakeLists.txt | 3 +++ libs/core/hardware/include/hpx/hardware/timestamp.hpp | 7 +------ .../hardware/include/hpx/hardware/timestamp/cuda.hpp | 6 ++---- .../include/hpx/hardware/timestamp/linux_generic.hpp | 10 +++++++++- .../include/hpx/hardware/timestamp/linux_x86_32.hpp | 8 ++++++++ .../include/hpx/hardware/timestamp/linux_x86_64.hpp | 8 ++++++++ 6 files changed, 31 insertions(+), 11 deletions(-) diff --git a/libs/core/hardware/CMakeLists.txt b/libs/core/hardware/CMakeLists.txt index ba5c76aab6d5..ae7e72fd9960 100644 --- a/libs/core/hardware/CMakeLists.txt +++ b/libs/core/hardware/CMakeLists.txt @@ -11,6 +11,7 @@ set(hardware_headers hpx/hardware/cpuid/linux_x86.hpp hpx/hardware/cpuid/msvc.hpp hpx/hardware/timestamp/bgq.hpp + hpx/hardware/timestamp/cuda.hpp hpx/hardware/timestamp/linux_generic.hpp hpx/hardware/timestamp/linux_x86_32.hpp hpx/hardware/timestamp/linux_x86_64.hpp @@ -26,6 +27,7 @@ set(hardware_compat_headers hpx/util/hardware/cpuid/linux_x86.hpp => hpx/modules/hardware.hpp hpx/util/hardware/cpuid/msvc.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/bgq.hpp => hpx/modules/hardware.hpp + hpx/util/hardware/timestamp/cuda.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_generic.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_x86_32.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_x86_64.hpp => hpx/modules/hardware.hpp @@ -47,6 +49,7 @@ add_hpx_module( "hpx/hardware/cpuid/linux_x86.hpp" "hpx/hardware/cpuid/msvc.hpp" "hpx/hardware/timestamp/bgq.hpp" + "hpx/hardware/timestamp/cuda.hpp" "hpx/hardware/timestamp/linux_generic.hpp" "hpx/hardware/timestamp/linux_x86_32.hpp" "hpx/hardware/timestamp/linux_x86_64.hpp" diff --git a/libs/core/hardware/include/hpx/hardware/timestamp.hpp b/libs/core/hardware/include/hpx/hardware/timestamp.hpp index 79cd26bbc3bb..c2a36546ff1f 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp.hpp @@ -10,12 +10,7 @@ #include -// clang-format off -// nvcc complains about multiple definition error for same function prototype -// irrespective of __host__, __device__, and __host__ __device__ attributes. -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) - #include -#elif defined(HPX_MSVC) +#if defined(HPX_MSVC) #include #elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \ defined(__x86_64) || defined(_M_X64) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp index b4c60e6b7c6d..7c39b01de42b 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2012 Thomas Heller +// Copyright (c) 2021 Nikunj Gupta // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -8,13 +8,11 @@ #pragma once -#include - #include namespace hpx { namespace util { namespace hardware { - HPX_HOST_DEVICE std::uint64_t timestamp() + HPX_DEVICE std::uint64_t timestamp_cuda() { std::uint64_t cur; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index f3fcbd8d057f..6233aec28741 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -14,13 +14,21 @@ #include +#if define(HPX_HAVE_CUDA) +#include +#endif + namespace hpx { namespace util { namespace hardware { - inline std::uint64_t timestamp() + HPX_HOST_DEVICE std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + return timestamp_cuda(); +#else struct timespec res; clock_gettime(CLOCK_MONOTONIC, &res); return 1000 * res.tv_sec + res.tv_nsec / 1000000; +#endif } }}} // namespace hpx::util::hardware diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index d43a576dec8b..b847bf6c6e31 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -11,11 +11,18 @@ #include #include +#if define(HPX_HAVE_CUDA) +#include +#endif + namespace hpx { namespace util { namespace hardware { // clang-format off inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + return timestamp_cuda(); +#else std::uint64_t r = 0; #if defined(HPX_HAVE_RDTSCP) @@ -37,6 +44,7 @@ namespace hpx { namespace util { namespace hardware { #endif return r; +#endif } // clang-format on diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index 3ae9f86a9be8..bf050582afc0 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -11,11 +11,18 @@ #include #include +#if define(HPX_HAVE_CUDA) +#include +#endif + namespace hpx { namespace util { namespace hardware { // clang-format off inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + return timestamp_cuda(); +#else std::uint32_t lo = 0, hi = 0; #if defined(HPX_HAVE_RDTSCP) __asm__ __volatile__( @@ -32,6 +39,7 @@ namespace hpx { namespace util { namespace hardware { : "rbx", "rcx"); #endif return ((static_cast(hi)) << 32) | lo; +#endif } // clang-format on From 1156fd4f32b6131f2bb1b5f8a55747ef55cf393a Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Mon, 7 Jun 2021 18:57:21 +0530 Subject: [PATCH 05/58] add timestamp function to msvc and bgq --- .../hardware/include/hpx/hardware/timestamp/bgq.hpp | 11 ++++++++++- .../include/hpx/hardware/timestamp/linux_generic.hpp | 2 +- .../include/hpx/hardware/timestamp/linux_x86_32.hpp | 4 ++-- .../include/hpx/hardware/timestamp/linux_x86_64.hpp | 2 +- .../hardware/include/hpx/hardware/timestamp/msvc.hpp | 8 ++++++++ 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp index b34ef95345c2..971122b78fab 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp @@ -17,11 +17,20 @@ #include +#include +#if defined(HPX_HAVE_CUDA) +#include +#endif + namespace hpx { namespace util { namespace hardware { - inline std::uint64_t timestamp() + HPX_HOST_DEVICE std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + return timestamp_cuda(); +#else return GetTimeBase(); +#endif } }}} // namespace hpx::util::hardware diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index 6233aec28741..9ef72cb51512 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -14,7 +14,7 @@ #include -#if define(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) #include #endif diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index b847bf6c6e31..194a48125d4d 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -11,14 +11,14 @@ #include #include -#if define(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) #include #endif namespace hpx { namespace util { namespace hardware { // clang-format off - inline std::uint64_t timestamp() + HPX_HOST_DEVICE std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index bf050582afc0..a3e99ad94fe1 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -11,7 +11,7 @@ #include #include -#if define(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) #include #endif diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp index 810782c6bb06..2e91ec1c8187 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp @@ -16,12 +16,20 @@ #include #include +#if defined(HPX_HAVE_CUDA) +#include +#endif + namespace hpx { namespace util { namespace hardware { inline std::uint64_t timestamp() { +#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) + return timestamp_cuda(); +#else LARGE_INTEGER now; QueryPerformanceCounter(&now); return static_cast(now.QuadPart); +#endif } }}} // namespace hpx::util::hardware From 6ce59367f95c3d5dddaac510e6ad3e5a3cb108e0 Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Wed, 23 Jun 2021 16:59:22 -0500 Subject: [PATCH 06/58] Add support for CPU/GPU source detection --- libs/core/hardware/CMakeLists.txt | 1 - libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp | 5 +++-- libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp | 5 +++-- .../include/hpx/hardware/timestamp/linux_generic.hpp | 4 ++-- .../hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp | 4 ++-- .../hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp | 4 ++-- libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/libs/core/hardware/CMakeLists.txt b/libs/core/hardware/CMakeLists.txt index ae7e72fd9960..68d25d95272a 100644 --- a/libs/core/hardware/CMakeLists.txt +++ b/libs/core/hardware/CMakeLists.txt @@ -27,7 +27,6 @@ set(hardware_compat_headers hpx/util/hardware/cpuid/linux_x86.hpp => hpx/modules/hardware.hpp hpx/util/hardware/cpuid/msvc.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/bgq.hpp => hpx/modules/hardware.hpp - hpx/util/hardware/timestamp/cuda.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_generic.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_x86_32.hpp => hpx/modules/hardware.hpp hpx/util/hardware/timestamp/linux_x86_64.hpp => hpx/modules/hardware.hpp diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp index 971122b78fab..5df1d79e9f73 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp @@ -18,13 +18,14 @@ #include #include -#if defined(HPX_HAVE_CUDA) + +#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) #include #endif namespace hpx { namespace util { namespace hardware { - HPX_HOST_DEVICE std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp index 7c39b01de42b..f187a73dfaca 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/cuda.hpp @@ -9,14 +9,15 @@ #pragma once #include +#include namespace hpx { namespace util { namespace hardware { - HPX_DEVICE std::uint64_t timestamp_cuda() + HPX_DEVICE inline std::uint64_t timestamp_cuda() { std::uint64_t cur; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(cur)); return cur; } -}}} // namespace hpx::util::hardware \ No newline at end of file +}}} // namespace hpx::util::hardware diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index 9ef72cb51512..bc00bd2c3dfe 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -14,13 +14,13 @@ #include -#if defined(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) #include #endif namespace hpx { namespace util { namespace hardware { - HPX_HOST_DEVICE std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index 194a48125d4d..0989c5fa4d94 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -11,14 +11,14 @@ #include #include -#if defined(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) #include #endif namespace hpx { namespace util { namespace hardware { // clang-format off - HPX_HOST_DEVICE std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index a3e99ad94fe1..513b85de422b 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -11,14 +11,14 @@ #include #include -#if defined(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) #include #endif namespace hpx { namespace util { namespace hardware { // clang-format off - inline std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp index 2e91ec1c8187..17c96e7d61e4 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp @@ -16,12 +16,12 @@ #include #include -#if defined(HPX_HAVE_CUDA) +#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) #include #endif namespace hpx { namespace util { namespace hardware { - inline std::uint64_t timestamp() + HPX_HOST_DEVICE inline std::uint64_t timestamp() { #if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) return timestamp_cuda(); From 47b8552e022684b638ccf238cce9026e9c2ec099 Mon Sep 17 00:00:00 2001 From: targetakhil Date: Wed, 9 Jun 2021 22:52:30 +0530 Subject: [PATCH 07/58] updated base implementations to return an in_out_result --- .../algorithms/uninitialized_move.hpp | 474 +++++++++++++++--- .../include/hpx/parallel/container_memory.hpp | 1 + .../hpx/parallel/util/result_types.hpp | 15 + .../algorithms/uninitialized_move_tests.hpp | 12 +- .../unit/algorithms/uninitialized_moven.cpp | 12 +- 5 files changed, 441 insertions(+), 73 deletions(-) diff --git a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp index 986ab261de7f..35f83cdb365f 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp @@ -8,8 +8,208 @@ #pragma once +#if defined(DOXYGEN) +namespace hpx { + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam InIter The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam FwdIter The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// + /// \param first Refers to the beginning of the sequence of elements + /// the algorithm will be applied to. + /// \param last Refers to the end of the sequence of elements the + /// algorithm will be applied to. + /// \param dest Refers to the beginning of the destination range. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// without an execution policy object will execute in sequential order in + /// the calling thread. + /// + /// \returns The \a uninitialized_copy algorithm returns \a FwdIter. + /// The \a uninitialized_copy algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + FwdIter uninitialized_copy(InIter first, InIter last, FwdIter dest); + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam FwdIter1 The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// forward iterator. + /// \tparam FwdIter2 The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param first Refers to the beginning of the sequence of elements + /// the algorithm will be applied to. + /// \param last Refers to the end of the sequence of elements the + /// algorithm will be applied to. + /// \param dest Refers to the beginning of the destination range. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a sequenced_policy + /// execute in sequential order in the calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy algorithm returns a + /// \a hpx::future, if the execution policy is of type + /// \a sequenced_task_policy or + /// \a parallel_task_policy and + /// returns \a FwdIter2 otherwise. + /// The \a uninitialized_copy algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + typename parallel::util::detail::algorithm_result::type + uninitialized_copy( + ExPolicy&& policy, FwdIter1 first, FwdIter1 last, FwdIter2 dest); + + /// Copies the elements in the range [first, first + count), starting from + /// first and proceeding to first + count - 1., to another range beginning + /// at dest. If an exception is thrown during the copy operation, the + /// function has no effects. + /// + /// \note Complexity: Performs exactly \a count assignments, if + /// count > 0, no assignments otherwise. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam FwdIter1 The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Size The type of the argument specifying the number of + /// elements to apply \a f to. + /// \tparam FwdIter2 The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param first Refers to the beginning of the sequence of elements + /// the algorithm will be applied to. + /// \param count Refers to the number of elements starting at + /// \a first the algorithm will be applied to. + /// \param dest Refers to the beginning of the destination range. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a sequenced_policy execute in sequential order in the + /// calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy_n algorithm returns a + /// \a hpx::future if the execution policy is of type + /// \a sequenced_task_policy or + /// \a parallel_task_policy and + /// returns \a FwdIter2 otherwise. + /// The \a uninitialized_copy_n algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + FwdIter uninitialized_copy_n(InIter first, Size count, FwdIter dest); + + /// Copies the elements in the range [first, first + count), starting from + /// first and proceeding to first + count - 1., to another range beginning + /// at dest. If an exception is thrown during the copy operation, the + /// function has no effects. + /// + /// \note Complexity: Performs exactly \a count assignments, if + /// count > 0, no assignments otherwise. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam FwdIter1 The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Size The type of the argument specifying the number of + /// elements to apply \a f to. + /// \tparam FwdIter2 The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param first Refers to the beginning of the sequence of elements + /// the algorithm will be applied to. + /// \param count Refers to the number of elements starting at + /// \a first the algorithm will be applied to. + /// \param dest Refers to the beginning of the destination range. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a sequenced_policy execute in sequential order in the + /// calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy_n algorithm returns a + /// \a hpx::future if the execution policy is of type + /// \a sequenced_task_policy or + /// \a parallel_task_policy and + /// returns \a FwdIter2 otherwise. + /// The \a uninitialized_copy_n algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + typename parallel::util::detail::algorithm_result::type + uninitialized_copy_n( + ExPolicy&& policy, FwdIter1 first, Size count, FwdIter2 dest); +} // namespace hpx + +#else // DOXYGEN + #include #include +#include #include #include @@ -20,6 +220,7 @@ #include #include #include +#include #include #include @@ -38,12 +239,12 @@ namespace hpx { namespace parallel { inline namespace v1 { // provide our own implementation of std::uninitialized_move as some // versions of MSVC horribly fail at compiling it for some types T - template - InIter2 std_uninitialized_move( - InIter1 first, InIter1 last, InIter2 d_first) + template + util::in_out_result std_uninitialized_move( + InIter1 first, Sent last, InIter2 d_first) { - typedef - typename std::iterator_traits::value_type value_type; + using value_type = + typename std::iterator_traits::value_type; InIter2 current = d_first; try @@ -53,7 +254,7 @@ namespace hpx { namespace parallel { inline namespace v1 { ::new (std::addressof(*current)) value_type(std::move(*first)); } - return current; + return util::in_out_result{first, current}; } catch (...) { @@ -67,33 +268,36 @@ namespace hpx { namespace parallel { inline namespace v1 { /////////////////////////////////////////////////////////////////////// template - InIter2 sequential_uninitialized_move_n(InIter1 first, - std::size_t count, InIter2 dest, + util::in_out_result sequential_uninitialized_move_n( + InIter1 first, std::size_t count, InIter2 dest, util::cancellation_token& tok) { - typedef - typename std::iterator_traits::value_type value_type; - - return util::loop_with_cleanup_n_with_token( - first, count, dest, tok, - [](InIter1 it, InIter2 dest) -> void { - ::new (std::addressof(*dest)) value_type(std::move(*it)); - }, - [](InIter2 dest) -> void { (*dest).~value_type(); }); + using value_type = + typename std::iterator_traits::value_type; + + return util::in_out_result{ + std::next(first, count), + util::loop_with_cleanup_n_with_token( + first, count, dest, tok, + [](InIter1 it, InIter2 dest) -> void { + ::new (std::addressof(*dest)) + value_type(std::move(*it)); + }, + [](InIter2 dest) -> void { (*dest).~value_type(); })}; } /////////////////////////////////////////////////////////////////////// template typename util::detail::algorithm_result>::type + util::in_out_result>::type parallel_sequential_uninitialized_move_n( ExPolicy&& policy, Iter first, std::size_t count, FwdIter2 dest) { if (count == 0) { return util::detail::algorithm_result>::get(std::make_pair(first, - dest)); + util::in_out_result>:: + get(util::in_out_result{first, dest}); } typedef hpx::util::zip_iterator zip_iterator; @@ -103,7 +307,7 @@ namespace hpx { namespace parallel { inline namespace v1 { util::cancellation_token tok; return util::partitioner_with_cleanup, partition_result_type>:: + util::in_out_result, partition_result_type>:: call( std::forward(policy), hpx::util::make_zip_iterator(first, dest), count, @@ -113,16 +317,17 @@ namespace hpx { namespace parallel { inline namespace v1 { auto iters = t.get_iterator_tuple(); FwdIter2 dest = get<1>(iters); return std::make_pair(dest, - sequential_uninitialized_move_n( - get<0>(iters), part_size, dest, tok)); + util::get_second_element( + sequential_uninitialized_move_n( + get<0>(iters), part_size, dest, tok))); }, // finalize, called once if no error occurred [first, dest, count](std::vector< hpx::future>&&) mutable - -> std::pair { + -> util::in_out_result { std::advance(first, count); std::advance(dest, count); - return std::make_pair(first, dest); + return util::in_out_result{first, dest}; }, // cleanup function, called for each partition which // didn't fail, but only if at least one failed @@ -136,34 +341,32 @@ namespace hpx { namespace parallel { inline namespace v1 { } /////////////////////////////////////////////////////////////////////// - template + template struct uninitialized_move - : public detail::algorithm, FwdIter2> + : public detail::algorithm, IterPair> { uninitialized_move() : uninitialized_move::algorithm("uninitialized_move") { } - template - static FwdIter2 sequential( - ExPolicy, InIter1 first, InIter1 last, FwdIter2 dest) + template + static util::in_out_result sequential( + ExPolicy, InIter1 first, Sent last, FwdIter2 dest) { return std_uninitialized_move(first, last, dest); } - template + template static typename util::detail::algorithm_result::type - parallel(ExPolicy&& policy, Iter first, Iter last, FwdIter2 dest) + util::in_out_result>::type + parallel(ExPolicy&& policy, Iter first, Sent last, FwdIter2 dest) { - return util::detail::convert_to_result( - parallel_sequential_uninitialized_move_n( - std::forward(policy), first, - std::distance(first, last), dest), - [](std::pair const& p) -> FwdIter2 { - return p.second; - }); + return parallel_sequential_uninitialized_move_n( + std::forward(policy), first, + detail::distance(first, last), dest); } }; /// \endcond @@ -219,17 +422,30 @@ namespace hpx { namespace parallel { inline namespace v1 { HPX_CONCEPT_REQUIRES_(hpx::is_execution_policy::value&& hpx::traits::is_iterator::value&& hpx::traits::is_iterator::value)> + HPX_DEPRECATED_V(1, 7, + "hpx::parallel::uninitialized_move is deprecated, use " + "hpx::uninitialized_move " + "instead") typename util::detail::algorithm_result::type - uninitialized_move( - ExPolicy&& policy, FwdIter1 first, FwdIter1 last, FwdIter2 dest) + uninitialized_move( + ExPolicy&& policy, FwdIter1 first, FwdIter1 last, FwdIter2 dest) { static_assert((hpx::traits::is_forward_iterator::value), "Required at least forward iterator."); static_assert((hpx::traits::is_forward_iterator::value), "Requires at least forward iterator."); - return detail::uninitialized_move().call( - std::forward(policy), first, last, dest); +#if defined(HPX_GCC_VERSION) && HPX_GCC_VERSION >= 100000 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + return parallel::util::get_second_element( + detail::uninitialized_move< + parallel::util::in_out_result>() + .call(std::forward(policy), first, last, dest)); +#if defined(HPX_GCC_VERSION) && HPX_GCC_VERSION >= 100000 +#pragma GCC diagnostic pop +#endif } ///////////////////////////////////////////////////////////////////////////// @@ -240,11 +456,11 @@ namespace hpx { namespace parallel { inline namespace v1 { // provide our own implementation of std::uninitialized_move_n as some // versions of MSVC horribly fail at compiling it for some types T template - std::pair std_uninitialized_move_n( + util::in_out_result std_uninitialized_move_n( InIter1 first, std::size_t count, InIter2 d_first) { - typedef - typename std::iterator_traits::value_type value_type; + using value_type = + typename std::iterator_traits::value_type; InIter2 current = d_first; try @@ -254,7 +470,7 @@ namespace hpx { namespace parallel { inline namespace v1 { ::new (std::addressof(*current)) value_type(std::move(*first)); } - return std::make_pair(first, current); + return util::in_out_result{first, current}; } catch (...) { @@ -353,28 +569,164 @@ namespace hpx { namespace parallel { inline namespace v1 { HPX_CONCEPT_REQUIRES_(hpx::is_execution_policy::value&& hpx::traits::is_iterator::value&& hpx::traits::is_iterator::value)> + HPX_DEPRECATED_V(1, 7, + "hpx::parallel::uninitialized_move_n is deprecated, use " + "hpx::uninitialized_move_n " + "instead") typename util::detail::algorithm_result>::type - uninitialized_move_n( - ExPolicy&& policy, FwdIter1 first, Size count, FwdIter2 dest) + std::pair>::type + uninitialized_move_n( + ExPolicy&& policy, FwdIter1 first, Size count, FwdIter2 dest) { - static_assert((hpx::traits::is_forward_iterator::value), + static_assert(hpx::traits::is_forward_iterator::value, "Required at least forward iterator."); - static_assert((hpx::traits::is_forward_iterator::value), + static_assert(hpx::traits::is_forward_iterator::value, "Requires at least forward iterator."); // if count is representing a negative value, we do nothing if (detail::is_negative(count)) { - return hpx::util::make_tagged_pair( - util::detail::algorithm_result>::get(std::make_pair(first, - dest))); + return parallel::util::detail::algorithm_result>::get(std::make_pair(first, + dest)); } - return hpx::util::make_tagged_pair( - detail::uninitialized_move_n>().call( - std::forward(policy), first, std::size_t(count), - dest)); +#if defined(HPX_GCC_VERSION) && HPX_GCC_VERSION >= 100000 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + return parallel::util::get_pair( + parallel::v1::detail::uninitialized_move_n< + parallel::util::in_out_result>() + .call(std::forward(policy), first, std::size_t(count), + dest)); +#if defined(HPX_GCC_VERSION) && HPX_GCC_VERSION >= 100000 +#pragma GCC diagnostic pop +#endif } }}} // namespace hpx::parallel::v1 + +namespace hpx { + /////////////////////////////////////////////////////////////////////////// + // DPO for hpx::uninitialized_move + HPX_INLINE_CONSTEXPR_VARIABLE struct uninitialized_move_t final + : hpx::functional::tag_fallback + { + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value + )> + // clang-format on + friend FwdIter tag_fallback_dispatch( + hpx::uninitialized_move_t, InIter first, InIter last, FwdIter dest) + { + static_assert(hpx::traits::is_input_iterator::value, + "Required at least input iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return parallel::util::get_second_element( + hpx::parallel::v1::detail::uninitialized_move< + parallel::util::in_out_result>() + .call(hpx::execution::seq, first, last, dest)); + } + + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_forward_iterator::value + )> + // clang-format on + friend typename parallel::util::detail::algorithm_result::type + tag_fallback_dispatch(hpx::uninitialized_move_t, ExPolicy&& policy, + FwdIter1 first, FwdIter1 last, FwdIter2 dest) + { + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return parallel::util::get_second_element( + hpx::parallel::v1::detail::uninitialized_move< + parallel::util::in_out_result>() + .call(std::forward(policy), first, last, dest)); + } + + } uninitialized_move{}; + + /////////////////////////////////////////////////////////////////////////// + // DPO for hpx::uninitialized_move_n + HPX_INLINE_CONSTEXPR_VARIABLE struct uninitialized_move_n_t final + : hpx::functional::tag_fallback + { + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value + )> + // clang-format on + friend std::pair tag_fallback_dispatch( + hpx::uninitialized_move_n_t, InIter first, Size count, FwdIter dest) + { + static_assert(hpx::traits::is_input_iterator::value, + "Required at least input iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + // if count is representing a negative value, we do nothing + if (hpx::parallel::v1::detail::is_negative(count)) + { + return std::make_pair(first, dest); + } + + return parallel::util::get_pair( + hpx::parallel::v1::detail::uninitialized_move_n< + parallel::util::in_out_result>() + .call( + hpx::execution::seq, first, std::size_t(count), dest)); + } + + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_forward_iterator::value + )> + // clang-format on + friend typename parallel::util::detail::algorithm_result>::type + tag_fallback_dispatch(hpx::uninitialized_move_n_t, ExPolicy&& policy, + FwdIter1 first, Size count, FwdIter2 dest) + { + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + // if count is representing a negative value, we do nothing + if (hpx::parallel::v1::detail::is_negative(count)) + { + return parallel::util::detail::algorithm_result>::get(std::pair(first, dest)); + } + + return parallel::util::get_pair( + hpx::parallel::v1::detail::uninitialized_move_n< + parallel::util::in_out_result>() + .call(std::forward(policy), first, + std::size_t(count), dest)); + } + + } uninitialized_move_n{}; +} // namespace hpx + +#endif // DOXYGEN diff --git a/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp b/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp index bdd409cfc887..c6e0c50b9925 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp @@ -8,4 +8,5 @@ #include +#include #include diff --git a/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp b/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp index 58704da6c776..6e2af5078553 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp @@ -87,12 +87,27 @@ namespace hpx { namespace parallel { namespace util { }; /////////////////////////////////////////////////////////////////////// + template + std::pair get_pair(util::in_out_result&& p) + { + return std::pair{p.in, p.out}; + } + template O get_second_element(util::in_out_result&& p) { return p.out; } + template + hpx::future> get_pair( + hpx::future>&& f) + { + return hpx::make_future>(std::move(f), [](util::in_out_result&& p) { + return std::pair{p.in, p.out}; + }); + } + template hpx::future get_second_element( hpx::future>&& f) diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp index e7cfbbd728d1..7eaf180f1479 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp @@ -33,7 +33,7 @@ void test_uninitialized_move(ExPolicy&& policy, IteratorTag) std::vector c(10007); std::vector d(c.size()); std::iota(std::begin(c), std::end(c), std::rand()); - hpx::parallel::uninitialized_move(std::forward(policy), + hpx::uninitialized_move(std::forward(policy), iterator(std::begin(c)), iterator(std::end(c)), std::begin(d)); std::size_t count = 0; @@ -57,7 +57,7 @@ void test_uninitialized_move_async(ExPolicy&& p, IteratorTag) std::iota(std::begin(c), std::end(c), std::rand()); hpx::future f = - hpx::parallel::uninitialized_move(std::forward(p), + hpx::uninitialized_move(std::forward(p), iterator(std::begin(c)), iterator(std::end(c)), std::begin(d)); f.wait(); @@ -92,7 +92,7 @@ void test_uninitialized_move_exception(ExPolicy policy, IteratorTag) bool caught_exception = false; try { - hpx::parallel::uninitialized_move(policy, + hpx::uninitialized_move(policy, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -133,7 +133,7 @@ void test_uninitialized_move_exception_async(ExPolicy p, IteratorTag) bool returned_from_algorithm = false; try { - hpx::future f = hpx::parallel::uninitialized_move(p, + hpx::future f = hpx::uninitialized_move(p, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -182,7 +182,7 @@ void test_uninitialized_move_bad_alloc(ExPolicy policy, IteratorTag) bool caught_bad_alloc = false; try { - hpx::parallel::uninitialized_move(policy, + hpx::uninitialized_move(policy, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -223,7 +223,7 @@ void test_uninitialized_move_bad_alloc_async(ExPolicy p, IteratorTag) bool returned_from_algorithm = false; try { - hpx::future f = hpx::parallel::uninitialized_move(p, + hpx::future f = hpx::uninitialized_move(p, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp index 7fee57890e72..d25f84398a66 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp @@ -32,7 +32,7 @@ void test_uninitialized_move_n(ExPolicy policy, IteratorTag) std::vector d(c.size()); std::iota(std::begin(c), std::end(c), std::rand()); - hpx::parallel::uninitialized_move_n( + hpx::uninitialized_move_n( policy, iterator(std::begin(c)), c.size(), std::begin(d)); std::size_t count = 0; @@ -55,7 +55,7 @@ void test_uninitialized_move_n_async(ExPolicy p, IteratorTag) std::vector d(c.size()); std::iota(std::begin(c), std::end(c), std::rand()); - auto f = hpx::parallel::uninitialized_move_n( + auto f = hpx::uninitialized_move_n( p, iterator(std::begin(c)), c.size(), std::begin(d)); f.wait(); @@ -109,7 +109,7 @@ void test_uninitialized_move_n_exception(ExPolicy policy, IteratorTag) bool caught_exception = false; try { - hpx::parallel::uninitialized_move_n(policy, + hpx::uninitialized_move_n(policy, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -150,7 +150,7 @@ void test_uninitialized_move_n_exception_async(ExPolicy p, IteratorTag) bool returned_from_algorithm = false; try { - auto f = hpx::parallel::uninitialized_move_n(p, + auto f = hpx::uninitialized_move_n(p, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -220,7 +220,7 @@ void test_uninitialized_move_n_bad_alloc(ExPolicy policy, IteratorTag) bool caught_bad_alloc = false; try { - hpx::parallel::uninitialized_move_n(policy, + hpx::uninitialized_move_n(policy, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) @@ -261,7 +261,7 @@ void test_uninitialized_move_n_bad_alloc_async(ExPolicy p, IteratorTag) bool returned_from_algorithm = false; try { - auto f = hpx::parallel::uninitialized_move_n(p, + auto f = hpx::uninitialized_move_n(p, decorated_iterator(std::begin(c), [&throw_after]() { if (throw_after-- == 0) From b8edb51c1082b13a5b5d461145ce372c6a023261 Mon Sep 17 00:00:00 2001 From: targetakhil Date: Sat, 12 Jun 2021 11:52:58 +0530 Subject: [PATCH 08/58] add container overloads for uninitialized_move and tests --- libs/parallelism/algorithms/CMakeLists.txt | 1 + .../algorithms/uninitialized_move.hpp | 77 ++- .../uninitialized_move.hpp | 507 ++++++++++++++++++ .../unit/container_algorithms/CMakeLists.txt | 2 + .../uninitialized_move_n_range.cpp | 153 ++++++ .../uninitialized_move_range.cpp | 493 +++++++++++++++++ 6 files changed, 1232 insertions(+), 1 deletion(-) create mode 100644 libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp create mode 100644 libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_n_range.cpp create mode 100644 libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_range.cpp diff --git a/libs/parallelism/algorithms/CMakeLists.txt b/libs/parallelism/algorithms/CMakeLists.txt index 6644db784885..8cb2dc63226c 100644 --- a/libs/parallelism/algorithms/CMakeLists.txt +++ b/libs/parallelism/algorithms/CMakeLists.txt @@ -121,6 +121,7 @@ set(algorithms_headers hpx/parallel/container_algorithms/stable_sort.hpp hpx/parallel/container_algorithms/transform.hpp hpx/parallel/container_algorithms/transform_reduce.hpp + hpx/parallel/container_algorithms/uninitialized_move.hpp hpx/parallel/container_algorithms/unique.hpp hpx/parallel/container_memory.hpp hpx/parallel/container_numeric.hpp diff --git a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp index 35f83cdb365f..74fc10a1a7e0 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp @@ -216,6 +216,7 @@ namespace hpx { #include #include #include +#include #include #include #include @@ -237,6 +238,34 @@ namespace hpx { namespace parallel { inline namespace v1 { namespace detail { /// \cond NOINTERNAL + /////////////////////////////////////////////////////////////////////// + template + util::in_out_result sequential_uninitialized_move( + InIter1 first, FwdIter2 dest, Cond cond) + { + using value_type = + typename std::iterator_traits::value_type; + + FwdIter2 current = dest; + try + { + for (/* */; cond(first, current); (void) ++first, ++current) + { + ::new (std::addressof(*current)) + value_type(std::move(*first)); + } + return util::in_out_result{first, current}; + } + catch (...) + { + for (/* */; dest != current; ++dest) + { + (*dest).~value_type(); + } + throw; + } + } + // provide our own implementation of std::uninitialized_move as some // versions of MSVC horribly fail at compiling it for some types T template @@ -355,7 +384,10 @@ namespace hpx { namespace parallel { inline namespace v1 { static util::in_out_result sequential( ExPolicy, InIter1 first, Sent last, FwdIter2 dest) { - return std_uninitialized_move(first, last, dest); + return sequential_uninitialized_move(first, dest, + [last](InIter1 first, FwdIter2 current) -> bool { + return first != last; + }); } template + struct uninitialized_move_sent + : public detail::algorithm, + IterPair> + { + uninitialized_move_sent() + : uninitialized_move_sent::algorithm("uninitialized_move_sent") + { + } + + template + static util::in_out_result sequential(ExPolicy, + InIter1 first, Sent1 last, FwdIter2 dest, Sent2 last_d) + { + return sequential_uninitialized_move(first, dest, + [last, last_d](InIter1 first, FwdIter2 current) -> bool { + return !(first == last || current == last_d); + }); + } + + template + static typename util::detail::algorithm_result>::type + parallel(ExPolicy&& policy, Iter first, Sent1 last, FwdIter2 dest, + Sent2 last_d) + { + std::size_t dist1 = detail::distance(first, last); + std::size_t dist2 = detail::distance(dest, last_d); + std::size_t dist = dist1 <= dist2 ? dist1 : dist2; + + return parallel_sequential_uninitialized_move_n( + std::forward(policy), first, dist, dest); + } + }; + /// \endcond + } // namespace detail + ///////////////////////////////////////////////////////////////////////////// // uninitialized_move_n namespace detail { diff --git a/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp new file mode 100644 index 000000000000..3fbca3db0073 --- /dev/null +++ b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp @@ -0,0 +1,507 @@ +// Copyright (c) 2020 ETH Zurich +// Copyright (c) 2014 Grant Mercer +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +/// \file parallel/container_algorithms/uninitialized_move.hpp + +#pragma once + +#if defined(DOXYGEN) + +namespace hpx { namespace ranges { + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam InIter The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Sent1 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for InIter. + /// \tparam FwdIter The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// \tparam Sent2 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for InIter2. + /// + /// \param first1 Refers to the beginning of the sequence of elements + /// that will be copied from + /// \param last1 Refers to sentinel value denoting the end of the + /// sequence of elements the algorithm will be applied + /// \param first2 Refers to the beginning of the destination range. + /// \param last2 Refers to sentinel value denoting the end of the + /// second range the algorithm will be applied to. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// without an execution policy object will execute in sequential order in + /// the calling thread. + /// + /// \returns The \a uninitialized_copy algorithm returns an + /// \a in_out_result. + /// The \a uninitialized_copy algorithm returns an input iterator + /// to one past the last element copied from and the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + hpx::parallel::util::in_out_result uninitialized_copy( + InIter first1, Sent1 last1, FwdIter first2, Sent2 last2); + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam FwdIter1 The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Sent1 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for InIter. + /// \tparam FwdIter2 The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// \tparam Sent2 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for InIter2. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param first1 Refers to the beginning of the sequence of elements + /// that will be copied from + /// \param last1 Refers to sentinel value denoting the end of the + /// sequence of elements the algorithm will be applied. + /// \param first2 Refers to the beginning of the destination range. + /// \param last2 Refers to sentinel value denoting the end of the + /// second range the algorithm will be applied to. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a sequenced_policy + /// execute in sequential order in the calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy algorithm returns a + /// \a hpx::future>, if the + /// execution policy is of type \a sequenced_task_policy + /// or \a parallel_task_policy and + /// returns \a in_out_result otherwise. + /// The \a uninitialized_copy algorithm returns an input iterator + /// to one past the last element copied from and the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + typename parallel::util::detail::algorithm_result>::type + uninitialized_copy(ExPolicy&& policy, FwdIter1 first1, Sent1 last1, + FwdIter2 first2, Sent2 last2); + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam Rng1 The type of the source range used (deduced). + /// The iterators extracted from this range type must + /// meet the requirements of an input iterator. + /// \tparam Rng2 The type of the destination range used (deduced). + /// The iterators extracted from this range type must + /// meet the requirements of an forward iterator. + /// + /// \param rng1 Refers to the range from which the elements + /// will be copied from + /// \param rng2 Refers to the range to which the elements + /// will be copied to + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// without an execution policy object will execute in sequential order in + /// the calling thread. + /// + /// \returns The \a uninitialized_copy algorithm returns an + /// \a in_out_result + /// ::iterator_type, typename hpx::traits::range_traits + /// ::iterator_type>. + /// The \a uninitialized_copy algorithm returns an input iterator + /// to one past the last element copied from and the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + hpx::parallel::util::in_out_result< + typename hpx::traits::range_traits::iterator_type, + typename hpx::traits::range_traits::iterator_type> + uninitialized_copy(Rng1&& rng1, Rng2&& rng2); + + /// Copies the elements in the range, defined by [first, last), to an + /// uninitialized memory area beginning at \a dest. If an exception is + /// thrown during the copy operation, the function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam Rng1 The type of the source range used (deduced). + /// The iterators extracted from this range type must + /// meet the requirements of an input iterator. + /// \tparam Rng2 The type of the destination range used (deduced). + /// The iterators extracted from this range type must + /// meet the requirements of an forward iterator. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param rng1 Refers to the range from which the elements + /// will be copied from + /// \param rng2 Refers to the range to which the elements + /// will be copied to + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a sequenced_policy + /// execute in sequential order in the calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// with an execution policy object of type \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy algorithm returns a + /// \a hpx::future>, if the + /// execution policy is of type \a sequenced_task_policy + /// or \a parallel_task_policy and + /// returns \a in_out_result< + /// typename hpx::traits::range_traits::iterator_type + /// , typename hpx::traits::range_traits::iterator_type> + /// otherwise. The \a uninitialized_copy algorithm returns the + /// input iterator to one past the last element copied from and + /// the output iterator to the element in the destination range, + /// one past the last element copied. + /// + template + typename parallel::util::detail::algorithm_result::iterator_type, + typename hpx::traits::range_traits::iterator_type>>::type + uninitialized_copy(ExPolicy&& policy, Rng1&& rng1, Rng2&& rng2); + + /// Copies the elements in the range [first, first + count), starting from + /// first and proceeding to first + count - 1., to another range beginning + /// at dest. If an exception is thrown during the copy operation, the + /// function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam InIter The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Size The type of the argument specifying the number of + /// elements to apply \a f to. + /// \tparam FwdIter The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// \tparam Sent2 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for FwdIter. + /// + /// \param first1 Refers to the beginning of the sequence of elements + /// that will be copied from + /// \param count Refers to the number of elements starting at + /// \a first the algorithm will be applied to. + /// \param first2 Refers to the beginning of the destination range. + /// \param last2 Refers to sentinel value denoting the end of the + /// second range the algorithm will be applied to. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a sequenced_policy execute in sequential order in the + /// calling thread. + /// + /// \returns The \a uninitialized_copy_n algorithm returns + /// \a in_out_result. + /// The \a uninitialized_copy_n algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + hpx::parallel::util::in_out_result uninitialized_copy_n( + InIter first1, Size count, FwdIter first2, Sent2 last2); + + /// Copies the elements in the range [first, first + count), starting from + /// first and proceeding to first + count - 1., to another range beginning + /// at dest. If an exception is thrown during the copy operation, the + /// function has no effects. + /// + /// \note Complexity: Performs exactly \a last - \a first assignments. + /// + /// \tparam ExPolicy The type of the execution policy to use (deduced). + /// It describes the manner in which the execution + /// of the algorithm may be parallelized and the manner + /// in which it executes the assignments. + /// \tparam FwdIter1 The type of the source iterators used (deduced). + /// This iterator type must meet the requirements of an + /// input iterator. + /// \tparam Size The type of the argument specifying the number of + /// elements to apply \a f to. + /// \tparam FwdIter2 The type of the iterator representing the + /// destination range (deduced). + /// This iterator type must meet the requirements of a + /// forward iterator. + /// \tparam Sent2 The type of the source sentinel (deduced). This + /// sentinel type must be a sentinel for InIter2. + /// + /// \param policy The execution policy to use for the scheduling of + /// the iterations. + /// \param first1 Refers to the beginning of the sequence of elements + /// that will be copied from + /// \param count Refers to the number of elements starting at + /// \a first the algorithm will be applied to. + /// \param first2 Refers to the beginning of the destination range. + /// \param last1 Refers to sentinel value denoting the end of the + /// second range the algorithm will be applied to. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a sequenced_policy execute in sequential order in the + /// calling thread. + /// + /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// invoked with an execution policy object of type + /// \a parallel_policy or + /// \a parallel_task_policy are permitted to execute in an + /// unordered fashion in unspecified threads, and indeterminately sequenced + /// within each thread. + /// + /// \returns The \a uninitialized_copy_n algorithm returns a + /// \a hpx::future> if the + /// execution policy is of type \a sequenced_task_policy or + /// \a parallel_task_policy and + /// returns \a FwdIter2 otherwise. + /// The \a uninitialized_copy_n algorithm returns the output + /// iterator to the element in the destination range, one past + /// the last element copied. + /// + template + typename parallel::util::detail::algorithm_result>::type + uninitialized_copy_n(ExPolicy&& policy, FwdIter1 first1, Size count, + FwdIter2 first2, Sent2 last2); +}} // namespace hpx::ranges +#else + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace hpx { namespace ranges { + HPX_INLINE_CONSTEXPR_VARIABLE struct uninitialized_move_t final + : hpx::functional::tag_fallback + { + private: + // clang-format off + template ::value && + hpx::traits::is_sentinel_for::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_sentinel_for::value + )> + // clang-format on + friend hpx::parallel::util::in_out_result + tag_fallback_dispatch(hpx::ranges::uninitialized_move_t, InIter first1, + Sent1 last1, FwdIter first2, Sent2 last2) + { + static_assert(hpx::traits::is_input_iterator::value, + "Requires at least input iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return hpx::parallel::v1::detail::uninitialized_move_sent< + parallel::util::in_out_result>() + .call(hpx::execution::seq, first1, last1, first2, last2); + } + + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_sentinel_for::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_sentinel_for::value + )> + // clang-format on + friend typename parallel::util::detail::algorithm_result>::type + tag_fallback_dispatch(hpx::ranges::uninitialized_move_t, + ExPolicy&& policy, FwdIter1 first1, Sent1 last1, FwdIter2 first2, + Sent2 last2) + { + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return hpx::parallel::v1::detail::uninitialized_move_sent< + parallel::util::in_out_result>() + .call(std::forward(policy), first1, last1, first2, + last2); + } + + // clang-format off + template ::value && + hpx::traits::is_range::value + )> + // clang-format on + friend hpx::parallel::util::in_out_result< + typename hpx::traits::range_traits::iterator_type, + typename hpx::traits::range_traits::iterator_type> + tag_fallback_dispatch( + hpx::ranges::uninitialized_move_t, Rng1&& rng1, Rng2&& rng2) + { + using iterator_type1 = + typename hpx::traits::range_traits::iterator_type; + using iterator_type2 = + typename hpx::traits::range_traits::iterator_type; + + static_assert(hpx::traits::is_input_iterator::value, + "Requires at least input iterator."); + + static_assert( + hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return hpx::parallel::v1::detail::uninitialized_move_sent< + parallel::util::in_out_result>() + .call(hpx::execution::seq, std::begin(rng1), std::end(rng1), + std::begin(rng2), std::end(rng2)); + } + + // clang-format off + template ::value && + hpx::traits::is_range::value && + hpx::traits::is_range::value + )> + // clang-format on + friend typename parallel::util::detail::algorithm_result::iterator_type, + typename hpx::traits::range_traits::iterator_type>>::type + tag_fallback_dispatch(hpx::ranges::uninitialized_move_t, + ExPolicy&& policy, Rng1&& rng1, Rng2&& rng2) + { + using iterator_type1 = + typename hpx::traits::range_traits::iterator_type; + using iterator_type2 = + typename hpx::traits::range_traits::iterator_type; + + static_assert( + hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + static_assert( + hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + return hpx::parallel::v1::detail::uninitialized_move_sent< + parallel::util::in_out_result>() + .call(std::forward(policy), std::begin(rng1), + std::end(rng1), std::begin(rng2), std::end(rng2)); + } + } uninitialized_move{}; + + HPX_INLINE_CONSTEXPR_VARIABLE struct uninitialized_move_n_t final + : hpx::functional::tag_fallback + { + private: + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_sentinel_for::value + )> + // clang-format on + friend hpx::parallel::util::in_out_result + tag_fallback_dispatch(hpx::ranges::uninitialized_move_n_t, + InIter first1, Size count, FwdIter first2, Sent2 last2) + { + static_assert(hpx::traits::is_input_iterator::value, + "Requires at least input iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + std::size_t d = parallel::v1::detail::distance(first2, last2); + return hpx::parallel::v1::detail::uninitialized_move_n< + parallel::util::in_out_result>() + .call(hpx::execution::seq, first1, count <= d ? count : d, + first2); + } + + // clang-format off + template ::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_forward_iterator::value && + hpx::traits::is_sentinel_for::value + )> + // clang-format on + friend typename parallel::util::detail::algorithm_result>::type + tag_fallback_dispatch(hpx::ranges::uninitialized_move_n_t, + ExPolicy&& policy, FwdIter1 first1, Size count, FwdIter2 first2, + Sent2 last2) + { + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + static_assert(hpx::traits::is_forward_iterator::value, + "Requires at least forward iterator."); + + std::size_t d = parallel::v1::detail::distance(first2, last2); + return hpx::parallel::v1::detail::uninitialized_move_n< + parallel::util::in_out_result>() + .call(std::forward(policy), first1, + count <= d ? count : d, first2); + } + } uninitialized_move_n{}; +}} // namespace hpx::ranges + +#endif diff --git a/libs/parallelism/algorithms/tests/unit/container_algorithms/CMakeLists.txt b/libs/parallelism/algorithms/tests/unit/container_algorithms/CMakeLists.txt index 3f13ab5bf70f..a588b0a59fe8 100644 --- a/libs/parallelism/algorithms/tests/unit/container_algorithms/CMakeLists.txt +++ b/libs/parallelism/algorithms/tests/unit/container_algorithms/CMakeLists.txt @@ -94,6 +94,8 @@ set(tests transform_reduce_binary_exception_range transform_reduce_binary_range transform_reduce_range + uninitialized_move_range + uninitialized_move_n_range unique_range unique_copy_range ) diff --git a/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_n_range.cpp b/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_n_range.cpp new file mode 100644 index 000000000000..fae0b0549ce7 --- /dev/null +++ b/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_n_range.cpp @@ -0,0 +1,153 @@ +// Copyright (c) 2014 Grant Mercer +// Copyright (c) 2015 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "test_utils.hpp" + +//////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move_n_sent(IteratorTag) +{ + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + hpx::ranges::uninitialized_move_n(std::begin(c), sent_len, std::begin(d), + sentinel{*(std::begin(d) + sent_len)}); + + std::size_t count = 0; + // loop till for sent_len since either the sentinel for the input or output iterator + // will be reached by then + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_n_sent(ExPolicy&& policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + hpx::ranges::uninitialized_move_n(policy, std::begin(c), sent_len, + std::begin(d), sentinel{*(std::begin(d) + sent_len)}); + + std::size_t count = 0; + // loop till for sent_len since either the sentinel for the input or output iterator + // will be reached by then + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_n_sent_async(ExPolicy&& p, IteratorTag) +{ + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + auto f = hpx::ranges::uninitialized_move_n(p, std::begin(c), sent_len, + std::begin(d), sentinel{*(std::begin(d) + sent_len)}); + f.wait(); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_n_sent() +{ + using namespace hpx::execution; + + test_uninitialized_move_n_sent(IteratorTag()); + + test_uninitialized_move_n_sent(seq, IteratorTag()); + test_uninitialized_move_n_sent(par, IteratorTag()); + test_uninitialized_move_n_sent(par_unseq, IteratorTag()); + + test_uninitialized_move_n_sent_async(seq(task), IteratorTag()); + test_uninitialized_move_n_sent_async(par(task), IteratorTag()); +} + +void uninitialized_move_n_sent_test() +{ + test_uninitialized_move_n_sent(); + test_uninitialized_move_n_sent(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + uninitialized_move_n_sent_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} diff --git a/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_range.cpp b/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_range.cpp new file mode 100644 index 000000000000..f8c736b35c12 --- /dev/null +++ b/libs/parallelism/algorithms/tests/unit/container_algorithms/uninitialized_move_range.cpp @@ -0,0 +1,493 @@ +// Copyright (c) 2014 Grant Mercer +// Copyright (c) 2015 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "test_utils.hpp" + +//////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move_sent(IteratorTag) +{ + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + hpx::ranges::uninitialized_move(std::begin(c), + sentinel{*(std::begin(c) + sent_len)}, std::begin(d), + sentinel{*(std::begin(d) + sent_len)}); + + std::size_t count = 0; + // loop till for sent_len since either the sentinel for the input or output iterator + // will be reached by then + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_sent(ExPolicy&& policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + hpx::ranges::uninitialized_move(policy, std::begin(c), + sentinel{*(std::begin(c) + sent_len)}, std::begin(d), + sentinel{*(std::begin(d) + sent_len)}); + + std::size_t count = 0; + // loop till for sent_len since either the sentinel for the input or output iterator + // will be reached by then + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_sent_async(ExPolicy&& p, IteratorTag) +{ + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::rbegin(d)); + std::size_t sent_len = (std::rand() % 10007) + 1; + auto f = hpx::ranges::uninitialized_move(p, std::begin(c), + sentinel{*(std::begin(c) + sent_len)}, std::begin(d), + sentinel{*(std::begin(d) + sent_len)}); + f.wait(); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::begin(c) + sent_len, std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, sent_len); +} + +template +void test_uninitialized_move_sent() +{ + using namespace hpx::execution; + + test_uninitialized_move_sent(IteratorTag()); + + test_uninitialized_move_sent(seq, IteratorTag()); + test_uninitialized_move_sent(par, IteratorTag()); + test_uninitialized_move_sent(par_unseq, IteratorTag()); + + test_uninitialized_move_sent_async(seq(task), IteratorTag()); + test_uninitialized_move_sent_async(par(task), IteratorTag()); +} + +void uninitialized_move_sent_test() +{ + test_uninitialized_move_sent(); + test_uninitialized_move_sent(); +} + +//////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move(IteratorTag) +{ + using test_vector = + typename test::test_container, IteratorTag>; + + test_vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + hpx::ranges::uninitialized_move(c, d); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, d.size()); +} + +template +void test_uninitialized_move(ExPolicy&& policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + using test_vector = + typename test::test_container, IteratorTag>; + + test_vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + hpx::ranges::uninitialized_move(policy, c, d); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, d.size()); +} + +template +void test_uninitialized_move_async(ExPolicy&& p, IteratorTag) +{ + using test_vector = + typename test::test_container, IteratorTag>; + + test_vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + auto f = hpx::ranges::uninitialized_move(p, c, d); + f.wait(); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, d.size()); +} + +template +void test_uninitialized_move() +{ + using namespace hpx::execution; + + test_uninitialized_move(IteratorTag()); + + test_uninitialized_move(seq, IteratorTag()); + test_uninitialized_move(par, IteratorTag()); + test_uninitialized_move(par_unseq, IteratorTag()); + + test_uninitialized_move_async(seq(task), IteratorTag()); + test_uninitialized_move_async(par(task), IteratorTag()); +} + +void uninitialized_move_test() +{ + test_uninitialized_move(); + test_uninitialized_move(); +} + +/////////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move_exception(IteratorTag) +{ + using base_iterator = typename std::vector::iterator; + using decorated_iterator = + typename test::decorated_iterator; + using test_vector = + typename test::test_container, IteratorTag>; + + std::vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + bool caught_exception = false; + try + { + hpx::ranges::uninitialized_move( + hpx::util::make_iterator_range( + decorated_iterator( + std::begin(c), []() { throw std::runtime_error("test"); }), + decorated_iterator(std::end(c))), + d); + HPX_TEST(false); + } + catch (hpx::exception_list const& e) + { + caught_exception = true; + test::test_num_exceptions::call(hpx::execution::seq, e); + } + catch (...) + { + HPX_TEST(false); + } + + HPX_TEST(caught_exception); +} + +template +void test_uninitialized_move_exception(ExPolicy&& policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + using base_iterator = typename std::vector::iterator; + using decorated_iterator = + typename test::decorated_iterator; + using test_vector = + typename test::test_container, IteratorTag>; + + std::vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + bool caught_exception = false; + try + { + hpx::ranges::uninitialized_move(policy, + hpx::util::make_iterator_range( + decorated_iterator( + std::begin(c), []() { throw std::runtime_error("test"); }), + decorated_iterator(std::end(c))), + d); + HPX_TEST(false); + } + catch (hpx::exception_list const& e) + { + caught_exception = true; + test::test_num_exceptions::call(policy, e); + } + catch (...) + { + HPX_TEST(false); + } + + HPX_TEST(caught_exception); +} + +template +void test_uninitialized_move_exception_async(ExPolicy&& p, IteratorTag) +{ + using base_iterator = typename std::vector::iterator; + using decorated_iterator = + typename test::decorated_iterator; + using test_vector = + typename test::test_container, IteratorTag>; + + std::vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + bool caught_exception = false; + bool returned_from_algorithm = false; + try + { + auto f = hpx::ranges::uninitialized_move(p, + hpx::util::make_iterator_range( + decorated_iterator( + std::begin(c), []() { throw std::runtime_error("test"); }), + decorated_iterator(std::end(c))), + d); + returned_from_algorithm = true; + f.get(); + + HPX_TEST(false); + } + catch (hpx::exception_list const& e) + { + caught_exception = true; + test::test_num_exceptions::call(p, e); + } + catch (...) + { + HPX_TEST(false); + } + + HPX_TEST(caught_exception); + HPX_TEST(returned_from_algorithm); +} + +template +void test_uninitialized_move_exception() +{ + using namespace hpx::execution; + + // If the execution policy object is of type vector_execution_policy, + // std::terminate shall be called. therefore we do not test exceptions + // with a vector execution policy + test_uninitialized_move_exception(seq, IteratorTag()); + test_uninitialized_move_exception(par, IteratorTag()); + + test_uninitialized_move_exception_async(seq(task), IteratorTag()); + test_uninitialized_move_exception_async(par(task), IteratorTag()); +} + +void uninitialized_move_exception_test() +{ + test_uninitialized_move_exception(); + test_uninitialized_move_exception(); +} + +////////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move_bad_alloc(ExPolicy&& policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + using base_iterator = typename std::vector::iterator; + using decorated_iterator = + typename test::decorated_iterator; + using test_vector = + typename test::test_container, IteratorTag>; + + std::vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + bool caught_bad_alloc = false; + try + { + hpx::ranges::uninitialized_move(policy, + hpx::util::make_iterator_range( + decorated_iterator( + std::begin(c), []() { throw std::bad_alloc(); }), + decorated_iterator(std::end(c))), + d); + HPX_TEST(false); + } + catch (std::bad_alloc const&) + { + caught_bad_alloc = true; + } + catch (...) + { + HPX_TEST(false); + } + + HPX_TEST(caught_bad_alloc); +} + +template +void test_uninitialized_move_bad_alloc_async(ExPolicy&& p, IteratorTag) +{ + using base_iterator = typename std::vector::iterator; + using decorated_iterator = + typename test::decorated_iterator; + using test_vector = + typename test::test_container, IteratorTag>; + + std::vector c(10007); + test_vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + bool caught_bad_alloc = false; + bool returned_from_algorithm = false; + try + { + auto f = hpx::ranges::uninitialized_move(p, + hpx::util::make_iterator_range( + decorated_iterator( + std::begin(c), []() { throw std::bad_alloc(); }), + decorated_iterator(std::end(c))), + d); + returned_from_algorithm = true; + f.get(); + + HPX_TEST(false); + } + catch (std::bad_alloc const&) + { + caught_bad_alloc = true; + } + catch (...) + { + HPX_TEST(false); + } + + HPX_TEST(caught_bad_alloc); + HPX_TEST(returned_from_algorithm); +} + +template +void test_uninitialized_move_bad_alloc() +{ + using namespace hpx::execution; + + // If the execution policy object is of type vector_execution_policy, + // std::terminate shall be called. therefore we do not test exceptions + // with a vector execution policy + test_uninitialized_move_bad_alloc(seq, IteratorTag()); + test_uninitialized_move_bad_alloc(par, IteratorTag()); + + test_uninitialized_move_bad_alloc_async(seq(task), IteratorTag()); + test_uninitialized_move_bad_alloc_async(par(task), IteratorTag()); +} + +void uninitialized_move_bad_alloc_test() +{ + test_uninitialized_move_bad_alloc(); + test_uninitialized_move_bad_alloc(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + uninitialized_move_test(); + uninitialized_move_sent_test(); + uninitialized_move_exception_test(); + uninitialized_move_bad_alloc_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} From 8ee3a597a33c09d9ce27f2fa3e8d09b7582445be Mon Sep 17 00:00:00 2001 From: targetakhil Date: Sun, 13 Jun 2021 13:28:59 +0530 Subject: [PATCH 09/58] updated documentation --- docs/sphinx/api/public_api.rst | 4 +- .../writing_single_node_hpx_applications.rst | 4 +- .../include/hpx/local/memory.hpp | 2 - .../algorithms/uninitialized_move.hpp | 144 +++++------------- .../uninitialized_move.hpp | 94 ++++++------ .../include/hpx/parallel/container_memory.hpp | 2 +- 6 files changed, 95 insertions(+), 155 deletions(-) diff --git a/docs/sphinx/api/public_api.rst b/docs/sphinx/api/public_api.rst index 2e701812b74d..f554ee7f7c6f 100644 --- a/docs/sphinx/api/public_api.rst +++ b/docs/sphinx/api/public_api.rst @@ -571,8 +571,8 @@ Functions - :cpp:func:`hpx::parallel::v1::uninitialized_default_construct_n` - :cpp:func:`hpx::parallel::v1::uninitialized_fill` - :cpp:func:`hpx::parallel::v1::uninitialized_fill_n` -- :cpp:func:`hpx::parallel::v1::uninitialized_move` -- :cpp:func:`hpx::parallel::v1::uninitialized_move_n` +- :cpp:func:`hpx::uninitialized_move` +- :cpp:func:`hpx::uninitialized_move_n` - :cpp:func:`hpx::parallel::v1::uninitialized_value_construct` - :cpp:func:`hpx::parallel::v1::uninitialized_value_construct_n` diff --git a/docs/sphinx/manual/writing_single_node_hpx_applications.rst b/docs/sphinx/manual/writing_single_node_hpx_applications.rst index aa85b4407a9f..17d696916004 100644 --- a/docs/sphinx/manual/writing_single_node_hpx_applications.rst +++ b/docs/sphinx/manual/writing_single_node_hpx_applications.rst @@ -740,11 +740,11 @@ Parallel algorithms * Copies an object to an uninitialized area of memory. * ```` * :cppreference-memory:`uninitialized_fill_n` - * * :cpp:func:`hpx::parallel::v1::uninitialized_move` + * * :cpp:func:`hpx::uninitialized_move` * Moves a range of objects to an uninitialized area of memory. * ```` * :cppreference-memory:`uninitialized_move` - * * :cpp:func:`hpx::parallel::v1::uninitialized_move_n` + * * :cpp:func:`hpx::uninitialized_move_n` * Moves a number of objects to an uninitialized area of memory. * ```` * :cppreference-memory:`uninitialized_move_n` diff --git a/libs/full/include_local/include/hpx/local/memory.hpp b/libs/full/include_local/include/hpx/local/memory.hpp index ac6f41fe4e72..582f73d749a0 100644 --- a/libs/full/include_local/include/hpx/local/memory.hpp +++ b/libs/full/include_local/include/hpx/local/memory.hpp @@ -16,8 +16,6 @@ namespace hpx { using hpx::parallel::uninitialized_default_construct_n; using hpx::parallel::uninitialized_fill; using hpx::parallel::uninitialized_fill_n; - using hpx::parallel::uninitialized_move; - using hpx::parallel::uninitialized_move_n; using hpx::parallel::uninitialized_value_construct; using hpx::parallel::uninitialized_value_construct_n; } // namespace hpx diff --git a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp index 74fc10a1a7e0..81a308092410 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp @@ -10,10 +10,10 @@ #if defined(DOXYGEN) namespace hpx { - - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -31,21 +31,22 @@ namespace hpx { /// algorithm will be applied to. /// \param dest Refers to the beginning of the destination range. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// without an execution policy object will execute in sequential order in /// the calling thread. /// - /// \returns The \a uninitialized_copy algorithm returns \a FwdIter. - /// The \a uninitialized_copy algorithm returns the output + /// \returns The \a uninitialized_move algorithm returns \a FwdIter. + /// The \a uninitialized_move algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template - FwdIter uninitialized_copy(InIter first, InIter last, FwdIter dest); + FwdIter uninitialized_move(InIter first, InIter last, FwdIter dest); - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -69,42 +70,39 @@ namespace hpx { /// algorithm will be applied to. /// \param dest Refers to the beginning of the destination range. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a sequenced_policy /// execute in sequential order in the calling thread. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a parallel_policy or /// \a parallel_task_policy are permitted to execute in an /// unordered fashion in unspecified threads, and indeterminately sequenced /// within each thread. /// - /// \returns The \a uninitialized_copy algorithm returns a + /// \returns The \a uninitialized_move algorithm returns a /// \a hpx::future, if the execution policy is of type /// \a sequenced_task_policy or /// \a parallel_task_policy and /// returns \a FwdIter2 otherwise. - /// The \a uninitialized_copy algorithm returns the output + /// The \a uninitialized_move algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template typename parallel::util::detail::algorithm_result::type - uninitialized_copy( + uninitialized_move( ExPolicy&& policy, FwdIter1 first, FwdIter1 last, FwdIter2 dest); - /// Copies the elements in the range [first, first + count), starting from + /// Moves the elements in the range [first, first + count), starting from /// first and proceeding to first + count - 1., to another range beginning - /// at dest. If an exception is thrown during the copy operation, the - /// function has no effects. + /// at dest. If an exception is + /// thrown during the initialization, some objects in [first, first + count) + /// are left in a valid but unspecified state. /// - /// \note Complexity: Performs exactly \a count assignments, if - /// count > 0, no assignments otherwise. + /// \note Complexity: Performs exactly \a count movements, if + /// count > 0, no move operations otherwise. /// - /// \tparam ExPolicy The type of the execution policy to use (deduced). - /// It describes the manner in which the execution - /// of the algorithm may be parallelized and the manner - /// in which it executes the assignments. /// \tparam FwdIter1 The type of the source iterators used (deduced). /// This iterator type must meet the requirements of an /// input iterator. @@ -115,45 +113,34 @@ namespace hpx { /// This iterator type must meet the requirements of a /// forward iterator. /// - /// \param policy The execution policy to use for the scheduling of - /// the iterations. /// \param first Refers to the beginning of the sequence of elements /// the algorithm will be applied to. /// \param count Refers to the number of elements starting at /// \a first the algorithm will be applied to. /// \param dest Refers to the beginning of the destination range. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a sequenced_policy execute in sequential order in the /// calling thread. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm - /// invoked with an execution policy object of type - /// \a parallel_policy or - /// \a parallel_task_policy are permitted to execute in an - /// unordered fashion in unspecified threads, and indeterminately sequenced - /// within each thread. - /// - /// \returns The \a uninitialized_copy_n algorithm returns a - /// \a hpx::future if the execution policy is of type - /// \a sequenced_task_policy or - /// \a parallel_task_policy and - /// returns \a FwdIter2 otherwise. - /// The \a uninitialized_copy_n algorithm returns the output + /// \returns The \a uninitialized_move_n algorithm returns a + /// returns \a FwdIter2. + /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past /// the last element copied. /// template - FwdIter uninitialized_copy_n(InIter first, Size count, FwdIter dest); + FwdIter uninitialized_move_n(InIter first, Size count, FwdIter dest); - /// Copies the elements in the range [first, first + count), starting from + /// Moves the elements in the range [first, first + count), starting from /// first and proceeding to first + count - 1., to another range beginning - /// at dest. If an exception is thrown during the copy operation, the - /// function has no effects. + /// at dest. If an exception is + /// thrown during the initialization, some objects in [first, first + count) + /// are left in a valid but unspecified state. /// - /// \note Complexity: Performs exactly \a count assignments, if - /// count > 0, no assignments otherwise. + /// \note Complexity: Performs exactly \a count movements, if + /// count > 0, no move operations otherwise. /// /// \tparam ExPolicy The type of the execution policy to use (deduced). /// It describes the manner in which the execution @@ -177,31 +164,31 @@ namespace hpx { /// \a first the algorithm will be applied to. /// \param dest Refers to the beginning of the destination range. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a sequenced_policy execute in sequential order in the /// calling thread. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a parallel_policy or /// \a parallel_task_policy are permitted to execute in an /// unordered fashion in unspecified threads, and indeterminately sequenced /// within each thread. /// - /// \returns The \a uninitialized_copy_n algorithm returns a + /// \returns The \a uninitialized_move_n algorithm returns a /// \a hpx::future if the execution policy is of type /// \a sequenced_task_policy or /// \a parallel_task_policy and /// returns \a FwdIter2 otherwise. - /// The \a uninitialized_copy_n algorithm returns the output + /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past /// the last element copied. /// template typename parallel::util::detail::algorithm_result::type - uninitialized_copy_n( + uninitialized_move_n( ExPolicy&& policy, FwdIter1 first, Size count, FwdIter2 dest); } // namespace hpx @@ -586,59 +573,6 @@ namespace hpx { namespace parallel { inline namespace v1 { /// \endcond } // namespace detail - /// Moves the elements in the range [first, first + count), starting from - /// first and proceeding to first + count - 1., to another range beginning - /// at dest. If an exception is - /// thrown during the initialization, some objects in [first, first + count) - /// are left in a valid but unspecified state. - /// - /// \note Complexity: Performs exactly \a count movements, if - /// count > 0, no move operations otherwise. - /// - /// \tparam ExPolicy The type of the execution policy to use (deduced). - /// It describes the manner in which the execution - /// of the algorithm may be parallelized and the manner - /// in which it executes the assignments. - /// \tparam FwdIter1 The type of the source iterators used (deduced). - /// This iterator type must meet the requirements of an - /// forward iterator. - /// \tparam Size The type of the argument specifying the number of - /// elements to apply \a f to. - /// \tparam FwdIter2 The type of the iterator representing the - /// destination range (deduced). - /// This iterator type must meet the requirements of a - /// forward iterator. - /// - /// \param policy The execution policy to use for the scheduling of - /// the iterations. - /// \param first Refers to the beginning of the sequence of elements - /// the algorithm will be applied to. - /// \param count Refers to the number of elements starting at - /// \a first the algorithm will be applied to. - /// \param dest Refers to the beginning of the destination range. - /// - /// The assignments in the parallel \a uninitialized_move_n algorithm - /// invoked with an execution policy object of type - /// \a sequenced_policy execute in sequential order in the - /// calling thread. - /// - /// The assignments in the parallel \a uninitialized_move_n algorithm - /// invoked with an execution policy object of type - /// \a parallel_policy or - /// \a parallel_task_policy are permitted to execute in an - /// unordered fashion in unspecified threads, and indeterminately sequenced - /// within each thread. - /// - /// \returns The \a uninitialized_move_n algorithm returns a - /// \a hpx::future> if the execution - /// policy is of type \a sequenced_task_policy or - /// \a parallel_task_policy and - /// returns \a std::pair otherwise. - /// The \a uninitialized_move_n algorithm returns the pair of - /// the input iterator to the element past in the source range - /// and an output iterator to the element in the destination - /// range, one past the last element moved. - /// template ::value&& diff --git a/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp index 3fbca3db0073..ec79adb3b42c 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp @@ -13,9 +13,10 @@ namespace hpx { namespace ranges { - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -39,24 +40,25 @@ namespace hpx { namespace ranges { /// \param last2 Refers to sentinel value denoting the end of the /// second range the algorithm will be applied to. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// without an execution policy object will execute in sequential order in /// the calling thread. /// - /// \returns The \a uninitialized_copy algorithm returns an + /// \returns The \a uninitialized_move algorithm returns an /// \a in_out_result. - /// The \a uninitialized_copy algorithm returns an input iterator + /// The \a uninitialized_move algorithm returns an input iterator /// to one past the last element copied from and the output /// iterator to the element in the destination range, one past /// the last element copied. /// template - hpx::parallel::util::in_out_result uninitialized_copy( + hpx::parallel::util::in_out_result uninitialized_move( InIter first1, Sent1 last1, FwdIter first2, Sent2 last2); - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -86,22 +88,22 @@ namespace hpx { namespace ranges { /// \param last2 Refers to sentinel value denoting the end of the /// second range the algorithm will be applied to. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a sequenced_policy /// execute in sequential order in the calling thread. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a parallel_policy or /// \a parallel_task_policy are permitted to execute in an /// unordered fashion in unspecified threads, and indeterminately sequenced /// within each thread. /// - /// \returns The \a uninitialized_copy algorithm returns a + /// \returns The \a uninitialized_move algorithm returns a /// \a hpx::future>, if the /// execution policy is of type \a sequenced_task_policy /// or \a parallel_task_policy and /// returns \a in_out_result otherwise. - /// The \a uninitialized_copy algorithm returns an input iterator + /// The \a uninitialized_move algorithm returns an input iterator /// to one past the last element copied from and the output /// iterator to the element in the destination range, one past /// the last element copied. @@ -110,12 +112,13 @@ namespace hpx { namespace ranges { typename FwdIter2, typename Sent2> typename parallel::util::detail::algorithm_result>::type - uninitialized_copy(ExPolicy&& policy, FwdIter1 first1, Sent1 last1, + uninitialized_move(ExPolicy&& policy, FwdIter1 first1, Sent1 last1, FwdIter2 first2, Sent2 last2); - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -131,15 +134,15 @@ namespace hpx { namespace ranges { /// \param rng2 Refers to the range to which the elements /// will be copied to /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// without an execution policy object will execute in sequential order in /// the calling thread. /// - /// \returns The \a uninitialized_copy algorithm returns an + /// \returns The \a uninitialized_move algorithm returns an /// \a in_out_result /// ::iterator_type, typename hpx::traits::range_traits /// ::iterator_type>. - /// The \a uninitialized_copy algorithm returns an input iterator + /// The \a uninitialized_move algorithm returns an input iterator /// to one past the last element copied from and the output /// iterator to the element in the destination range, one past /// the last element copied. @@ -148,11 +151,12 @@ namespace hpx { namespace ranges { hpx::parallel::util::in_out_result< typename hpx::traits::range_traits::iterator_type, typename hpx::traits::range_traits::iterator_type> - uninitialized_copy(Rng1&& rng1, Rng2&& rng2); + uninitialized_move(Rng1&& rng1, Rng2&& rng2); - /// Copies the elements in the range, defined by [first, last), to an + /// Moves the elements in the range, defined by [first, last), to an /// uninitialized memory area beginning at \a dest. If an exception is - /// thrown during the copy operation, the function has no effects. + /// thrown during the initialization, some objects in [first, last) are + /// left in a valid but unspecified state. /// /// \note Complexity: Performs exactly \a last - \a first assignments. /// @@ -174,24 +178,24 @@ namespace hpx { namespace ranges { /// \param rng2 Refers to the range to which the elements /// will be copied to /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a sequenced_policy /// execute in sequential order in the calling thread. /// - /// The assignments in the parallel \a uninitialized_copy algorithm invoked + /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a parallel_policy or /// \a parallel_task_policy are permitted to execute in an /// unordered fashion in unspecified threads, and indeterminately sequenced /// within each thread. /// - /// \returns The \a uninitialized_copy algorithm returns a + /// \returns The \a uninitialized_move algorithm returns a /// \a hpx::future>, if the /// execution policy is of type \a sequenced_task_policy /// or \a parallel_task_policy and /// returns \a in_out_result< /// typename hpx::traits::range_traits::iterator_type /// , typename hpx::traits::range_traits::iterator_type> - /// otherwise. The \a uninitialized_copy algorithm returns the + /// otherwise. The \a uninitialized_move algorithm returns the /// input iterator to one past the last element copied from and /// the output iterator to the element in the destination range, /// one past the last element copied. @@ -201,14 +205,16 @@ namespace hpx { namespace ranges { hpx::parallel::util::in_out_result< typename hpx::traits::range_traits::iterator_type, typename hpx::traits::range_traits::iterator_type>>::type - uninitialized_copy(ExPolicy&& policy, Rng1&& rng1, Rng2&& rng2); + uninitialized_move(ExPolicy&& policy, Rng1&& rng1, Rng2&& rng2); - /// Copies the elements in the range [first, first + count), starting from + /// Moves the elements in the range [first, first + count), starting from /// first and proceeding to first + count - 1., to another range beginning - /// at dest. If an exception is thrown during the copy operation, the - /// function has no effects. + /// at dest. If an exception is + /// thrown during the initialization, some objects in [first, first + count) + /// are left in a valid but unspecified state. /// - /// \note Complexity: Performs exactly \a last - \a first assignments. + /// \note Complexity: Performs exactly \a count movements, if + /// count > 0, no move operations otherwise. /// /// \tparam InIter The type of the source iterators used (deduced). /// This iterator type must meet the requirements of an @@ -230,27 +236,29 @@ namespace hpx { namespace ranges { /// \param last2 Refers to sentinel value denoting the end of the /// second range the algorithm will be applied to. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a sequenced_policy execute in sequential order in the /// calling thread. /// - /// \returns The \a uninitialized_copy_n algorithm returns + /// \returns The \a uninitialized_move_n algorithm returns /// \a in_out_result. - /// The \a uninitialized_copy_n algorithm returns the output + /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past /// the last element copied. /// template - hpx::parallel::util::in_out_result uninitialized_copy_n( + hpx::parallel::util::in_out_result uninitialized_move_n( InIter first1, Size count, FwdIter first2, Sent2 last2); - /// Copies the elements in the range [first, first + count), starting from + /// Moves the elements in the range [first, first + count), starting from /// first and proceeding to first + count - 1., to another range beginning - /// at dest. If an exception is thrown during the copy operation, the - /// function has no effects. + /// at dest. If an exception is + /// thrown during the initialization, some objects in [first, first + count) + /// are left in a valid but unspecified state. /// - /// \note Complexity: Performs exactly \a last - \a first assignments. + /// \note Complexity: Performs exactly \a count movements, if + /// count > 0, no move operations otherwise. /// /// \tparam ExPolicy The type of the execution policy to use (deduced). /// It describes the manner in which the execution @@ -278,24 +286,24 @@ namespace hpx { namespace ranges { /// \param last1 Refers to sentinel value denoting the end of the /// second range the algorithm will be applied to. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a sequenced_policy execute in sequential order in the /// calling thread. /// - /// The assignments in the parallel \a uninitialized_copy_n algorithm + /// The assignments in the parallel \a uninitialized_move_n algorithm /// invoked with an execution policy object of type /// \a parallel_policy or /// \a parallel_task_policy are permitted to execute in an /// unordered fashion in unspecified threads, and indeterminately sequenced /// within each thread. /// - /// \returns The \a uninitialized_copy_n algorithm returns a + /// \returns The \a uninitialized_move_n algorithm returns a /// \a hpx::future> if the /// execution policy is of type \a sequenced_task_policy or /// \a parallel_task_policy and /// returns \a FwdIter2 otherwise. - /// The \a uninitialized_copy_n algorithm returns the output + /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past /// the last element copied. /// @@ -303,7 +311,7 @@ namespace hpx { namespace ranges { typename FwdIter2, typename Sent2> typename parallel::util::detail::algorithm_result>::type - uninitialized_copy_n(ExPolicy&& policy, FwdIter1 first1, Size count, + uninitialized_move_n(ExPolicy&& policy, FwdIter1 first1, Size count, FwdIter2 first2, Sent2 last2); }} // namespace hpx::ranges #else diff --git a/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp b/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp index c6e0c50b9925..69e9048087d1 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/container_memory.hpp @@ -8,5 +8,5 @@ #include -#include #include +#include From 73bf162621a8e4b3dcac2ef01669652a31555def Mon Sep 17 00:00:00 2001 From: targetakhil Date: Tue, 15 Jun 2021 10:45:38 +0530 Subject: [PATCH 10/58] add test for overload not taking in an expolicy --- .../algorithms/uninitialized_move.hpp | 39 +++---------------- .../uninitialized_move.hpp | 36 ++++++++--------- .../unit/algorithms/uninitialized_move.cpp | 1 + .../algorithms/uninitialized_move_tests.hpp | 21 ++++++++++ .../unit/algorithms/uninitialized_moven.cpp | 23 +++++++++++ 5 files changed, 68 insertions(+), 52 deletions(-) diff --git a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp index 81a308092410..8f30ae965136 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/algorithms/uninitialized_move.hpp @@ -128,7 +128,7 @@ namespace hpx { /// returns \a FwdIter2. /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template FwdIter uninitialized_move_n(InIter first, Size count, FwdIter dest); @@ -183,7 +183,7 @@ namespace hpx { /// returns \a FwdIter2 otherwise. /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template @@ -253,35 +253,6 @@ namespace hpx { namespace parallel { inline namespace v1 { } } - // provide our own implementation of std::uninitialized_move as some - // versions of MSVC horribly fail at compiling it for some types T - template - util::in_out_result std_uninitialized_move( - InIter1 first, Sent last, InIter2 d_first) - { - using value_type = - typename std::iterator_traits::value_type; - - InIter2 current = d_first; - try - { - for (/* */; first != last; ++first, (void) ++current) - { - ::new (std::addressof(*current)) - value_type(std::move(*first)); - } - return util::in_out_result{first, current}; - } - catch (...) - { - for (/* */; d_first != current; ++d_first) - { - (*d_first).~value_type(); - } - throw; - } - } - /////////////////////////////////////////////////////////////////////// template util::in_out_result sequential_uninitialized_move_n( @@ -371,8 +342,8 @@ namespace hpx { namespace parallel { inline namespace v1 { static util::in_out_result sequential( ExPolicy, InIter1 first, Sent last, FwdIter2 dest) { - return sequential_uninitialized_move(first, dest, - [last](InIter1 first, FwdIter2 current) -> bool { + return sequential_uninitialized_move( + first, dest, [last](InIter1 first, FwdIter2) -> bool { return first != last; }); } @@ -692,7 +663,7 @@ namespace hpx { // if count is representing a negative value, we do nothing if (hpx::parallel::v1::detail::is_negative(count)) { - return std::make_pair(first, dest); + return std::pair(first, dest); } return parallel::util::get_pair( diff --git a/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp index ec79adb3b42c..ec0b8e50b2f2 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/container_algorithms/uninitialized_move.hpp @@ -33,7 +33,7 @@ namespace hpx { namespace ranges { /// sentinel type must be a sentinel for InIter2. /// /// \param first1 Refers to the beginning of the sequence of elements - /// that will be copied from + /// that will be moved from /// \param last1 Refers to sentinel value denoting the end of the /// sequence of elements the algorithm will be applied /// \param first2 Refers to the beginning of the destination range. @@ -47,9 +47,9 @@ namespace hpx { namespace ranges { /// \returns The \a uninitialized_move algorithm returns an /// \a in_out_result. /// The \a uninitialized_move algorithm returns an input iterator - /// to one past the last element copied from and the output + /// to one past the last element moved from and the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template hpx::parallel::util::in_out_result uninitialized_move( @@ -81,7 +81,7 @@ namespace hpx { namespace ranges { /// \param policy The execution policy to use for the scheduling of /// the iterations. /// \param first1 Refers to the beginning of the sequence of elements - /// that will be copied from + /// that will be moved from /// \param last1 Refers to sentinel value denoting the end of the /// sequence of elements the algorithm will be applied. /// \param first2 Refers to the beginning of the destination range. @@ -104,9 +104,9 @@ namespace hpx { namespace ranges { /// or \a parallel_task_policy and /// returns \a in_out_result otherwise. /// The \a uninitialized_move algorithm returns an input iterator - /// to one past the last element copied from and the output + /// to one past the last element moved from and the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template @@ -130,9 +130,9 @@ namespace hpx { namespace ranges { /// meet the requirements of an forward iterator. /// /// \param rng1 Refers to the range from which the elements - /// will be copied from + /// will be moved from /// \param rng2 Refers to the range to which the elements - /// will be copied to + /// will be moved to /// /// The assignments in the parallel \a uninitialized_move algorithm invoked /// without an execution policy object will execute in sequential order in @@ -143,9 +143,9 @@ namespace hpx { namespace ranges { /// ::iterator_type, typename hpx::traits::range_traits /// ::iterator_type>. /// The \a uninitialized_move algorithm returns an input iterator - /// to one past the last element copied from and the output + /// to one past the last element moved from and the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template hpx::parallel::util::in_out_result< @@ -174,9 +174,9 @@ namespace hpx { namespace ranges { /// \param policy The execution policy to use for the scheduling of /// the iterations. /// \param rng1 Refers to the range from which the elements - /// will be copied from + /// will be moved from /// \param rng2 Refers to the range to which the elements - /// will be copied to + /// will be moved to /// /// The assignments in the parallel \a uninitialized_move algorithm invoked /// with an execution policy object of type \a sequenced_policy @@ -196,9 +196,9 @@ namespace hpx { namespace ranges { /// typename hpx::traits::range_traits::iterator_type /// , typename hpx::traits::range_traits::iterator_type> /// otherwise. The \a uninitialized_move algorithm returns the - /// input iterator to one past the last element copied from and + /// input iterator to one past the last element moved from and /// the output iterator to the element in the destination range, - /// one past the last element copied. + /// one past the last element moved. /// template typename parallel::util::detail::algorithm_result. /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template hpx::parallel::util::in_out_result uninitialized_move_n( @@ -279,7 +279,7 @@ namespace hpx { namespace ranges { /// \param policy The execution policy to use for the scheduling of /// the iterations. /// \param first1 Refers to the beginning of the sequence of elements - /// that will be copied from + /// that will be moved from /// \param count Refers to the number of elements starting at /// \a first the algorithm will be applied to. /// \param first2 Refers to the beginning of the destination range. @@ -305,7 +305,7 @@ namespace hpx { namespace ranges { /// returns \a FwdIter2 otherwise. /// The \a uninitialized_move_n algorithm returns the output /// iterator to the element in the destination range, one past - /// the last element copied. + /// the last element moved. /// template diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move.cpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move.cpp index 44b372d4849d..e308f66e243c 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move.cpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move.cpp @@ -17,6 +17,7 @@ template void test_uninitialized_move() { using namespace hpx::execution; + test_uninitialized_move(IteratorTag()); test_uninitialized_move(seq, IteratorTag()); test_uninitialized_move(par, IteratorTag()); test_uninitialized_move(par_unseq, IteratorTag()); diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp index 7eaf180f1479..e1a3c5352f94 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp @@ -21,6 +21,27 @@ #include "test_utils.hpp" //////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move(IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + hpx::uninitialized_move(iterator(std::begin(c)), iterator(std::end(c)), std::begin(d)); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, d.size()); +} + template void test_uninitialized_move(ExPolicy&& policy, IteratorTag) { diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp index d25f84398a66..3449fa827245 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_moven.cpp @@ -19,6 +19,28 @@ #include "test_utils.hpp" //////////////////////////////////////////////////////////////////////////// +template +void test_uninitialized_move_n(IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + + hpx::uninitialized_move_n(iterator(std::begin(c)), c.size(), std::begin(d)); + + std::size_t count = 0; + HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), + [&count](std::size_t v1, std::size_t v2) -> bool { + HPX_TEST_EQ(v1, v2); + ++count; + return v1 == v2; + })); + HPX_TEST_EQ(count, d.size()); +} + template void test_uninitialized_move_n(ExPolicy policy, IteratorTag) { @@ -72,6 +94,7 @@ void test_uninitialized_move_n_async(ExPolicy p, IteratorTag) template void test_uninitialized_move_n() { + test_uninitialized_move_n(IteratorTag()); test_uninitialized_move_n(hpx::execution::seq, IteratorTag()); test_uninitialized_move_n(hpx::execution::par, IteratorTag()); test_uninitialized_move_n(hpx::execution::par_unseq, IteratorTag()); From 0fa12911c8da4bdb50945538b48e9075310abaa8 Mon Sep 17 00:00:00 2001 From: targetakhil Date: Tue, 15 Jun 2021 13:40:28 +0530 Subject: [PATCH 11/58] clang format fix and update documentation with range overloads --- docs/sphinx/api/public_api.rst | 3 +++ .../algorithms/include/hpx/parallel/util/result_types.hpp | 3 ++- .../tests/unit/algorithms/uninitialized_move_tests.hpp | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/api/public_api.rst b/docs/sphinx/api/public_api.rst index f554ee7f7c6f..75b6fecbd002 100644 --- a/docs/sphinx/api/public_api.rst +++ b/docs/sphinx/api/public_api.rst @@ -576,6 +576,9 @@ Functions - :cpp:func:`hpx::parallel::v1::uninitialized_value_construct` - :cpp:func:`hpx::parallel::v1::uninitialized_value_construct_n` +- :cpp:func:`hpx::ranges::uninitialized_move` +- :cpp:func:`hpx::ranges::uninitialized_move_n` + Header ``hpx/numeric.hpp`` ========================== diff --git a/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp b/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp index 6e2af5078553..e58056bc5236 100644 --- a/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp +++ b/libs/parallelism/algorithms/include/hpx/parallel/util/result_types.hpp @@ -103,7 +103,8 @@ namespace hpx { namespace parallel { namespace util { hpx::future> get_pair( hpx::future>&& f) { - return hpx::make_future>(std::move(f), [](util::in_out_result&& p) { + return hpx::make_future>( + std::move(f), [](util::in_out_result&& p) { return std::pair{p.in, p.out}; }); } diff --git a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp index e1a3c5352f94..aa94c2ed7b08 100644 --- a/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp +++ b/libs/parallelism/algorithms/tests/unit/algorithms/uninitialized_move_tests.hpp @@ -30,7 +30,8 @@ void test_uninitialized_move(IteratorTag) std::vector c(10007); std::vector d(c.size()); std::iota(std::begin(c), std::end(c), std::rand()); - hpx::uninitialized_move(iterator(std::begin(c)), iterator(std::end(c)), std::begin(d)); + hpx::uninitialized_move( + iterator(std::begin(c)), iterator(std::end(c)), std::begin(d)); std::size_t count = 0; HPX_TEST(std::equal(std::begin(c), std::end(c), std::begin(d), From dab45d9257225ab2aef0527b62b0ea19df99b71a Mon Sep 17 00:00:00 2001 From: Nikunj Gupta Date: Tue, 29 Jun 2021 02:42:11 +0530 Subject: [PATCH 12/58] Finish touch-ups on cuda timestamp --- libs/core/hardware/include/hpx/hardware/timestamp.hpp | 1 + libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp | 8 ++++---- .../include/hpx/hardware/timestamp/linux_generic.hpp | 8 ++++---- .../include/hpx/hardware/timestamp/linux_x86_32.hpp | 7 ++++--- .../include/hpx/hardware/timestamp/linux_x86_64.hpp | 7 ++++--- .../core/hardware/include/hpx/hardware/timestamp/msvc.hpp | 5 +++-- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/libs/core/hardware/include/hpx/hardware/timestamp.hpp b/libs/core/hardware/include/hpx/hardware/timestamp.hpp index c2a36546ff1f..bb2864cd124c 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp.hpp @@ -10,6 +10,7 @@ #include +// clang-format off #if defined(HPX_MSVC) #include #elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \ diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp index 5df1d79e9f73..0a413d114a1a 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/bgq.hpp @@ -15,11 +15,11 @@ #include -#include - #include -#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) +#include + +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_CODE) #include #endif @@ -27,7 +27,7 @@ namespace hpx { namespace util { namespace hardware { HPX_HOST_DEVICE inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_DEVICE_CODE) return timestamp_cuda(); #else return GetTimeBase(); diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp index bc00bd2c3dfe..a55dd95af01d 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_generic.hpp @@ -8,13 +8,13 @@ #pragma once -#include +#include #include -#include +#include -#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_CODE) #include #endif @@ -22,7 +22,7 @@ namespace hpx { namespace util { namespace hardware { HPX_HOST_DEVICE inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_DEVICE_CODE) return timestamp_cuda(); #else struct timespec res; diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp index 0989c5fa4d94..6c4416f86bcb 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_32.hpp @@ -8,10 +8,11 @@ #pragma once -#include #include -#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) +#include + +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_CODE) #include #endif @@ -20,7 +21,7 @@ namespace hpx { namespace util { namespace hardware { // clang-format off HPX_HOST_DEVICE inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_DEVICE_CODE) return timestamp_cuda(); #else std::uint64_t r = 0; diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp index 513b85de422b..b13efbb80049 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/linux_x86_64.hpp @@ -8,10 +8,11 @@ #pragma once -#include #include -#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) +#include + +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_CODE) #include #endif @@ -20,7 +21,7 @@ namespace hpx { namespace util { namespace hardware { // clang-format off HPX_HOST_DEVICE inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_DEVICE_CODE) return timestamp_cuda(); #else std::uint32_t lo = 0, hi = 0; diff --git a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp index 17c96e7d61e4..3578e5224e9c 100644 --- a/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp +++ b/libs/core/hardware/include/hpx/hardware/timestamp/msvc.hpp @@ -9,6 +9,7 @@ #pragma once #include + #if defined(HPX_WINDOWS) #include @@ -16,14 +17,14 @@ #include #include -#if defined(HPX_HAVE_CUDA) && defined(__CUDACC__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_CODE) #include #endif namespace hpx { namespace util { namespace hardware { HPX_HOST_DEVICE inline std::uint64_t timestamp() { -#if defined(HPX_HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(HPX_HAVE_CUDA) && defined(HPX_COMPUTE_DEVICE_CODE) return timestamp_cuda(); #else LARGE_INTEGER now; From 4afefa4d721307fafaf2b28d5c0ae4749a0c1d31 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Sat, 26 Jun 2021 17:03:16 -0500 Subject: [PATCH 13/58] Improve integration of futures and senders --- libs/core/allocator_support/CMakeLists.txt | 1 + .../allocator_support/traits/is_allocator.hpp | 87 ++++++ .../hpx/execution_base/operation_state.hpp | 3 +- .../include/hpx/execution_base/receiver.hpp | 7 +- .../include/hpx/execution_base/sender.hpp | 11 +- libs/parallelism/execution/CMakeLists.txt | 4 +- .../algorithms/detail/partial_algorithm.hpp | 1 + .../hpx/execution/algorithms/keep_future.hpp | 55 ++-- .../hpx/execution/algorithms/make_future.hpp | 183 ++++++++++++ .../hpx/execution/algorithms/sync_wait.hpp | 2 +- .../include/hpx/execution/sender_future.hpp | 139 --------- .../executors/tests/unit/p0443_executor.cpp | 41 ++- .../futures/include/hpx/futures/future.hpp | 272 +++++++++--------- .../include/hpx/futures/traits/is_future.hpp | 5 +- 14 files changed, 487 insertions(+), 324 deletions(-) create mode 100644 libs/core/allocator_support/include/hpx/allocator_support/traits/is_allocator.hpp create mode 100644 libs/parallelism/execution/include/hpx/execution/algorithms/make_future.hpp delete mode 100644 libs/parallelism/execution/include/hpx/execution/sender_future.hpp diff --git a/libs/core/allocator_support/CMakeLists.txt b/libs/core/allocator_support/CMakeLists.txt index 408d90f04b55..c3f29e04f92c 100644 --- a/libs/core/allocator_support/CMakeLists.txt +++ b/libs/core/allocator_support/CMakeLists.txt @@ -8,6 +8,7 @@ set(allocator_support_headers hpx/allocator_support/aligned_allocator.hpp hpx/allocator_support/allocator_deleter.hpp hpx/allocator_support/internal_allocator.hpp + hpx/allocator_support/traits/is_allocator.hpp ) # cmake-format: off diff --git a/libs/core/allocator_support/include/hpx/allocator_support/traits/is_allocator.hpp b/libs/core/allocator_support/include/hpx/allocator_support/traits/is_allocator.hpp new file mode 100644 index 000000000000..8056a41e5da0 --- /dev/null +++ b/libs/core/allocator_support/include/hpx/allocator_support/traits/is_allocator.hpp @@ -0,0 +1,87 @@ +// Copyright (c) 2021 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// this trait has been inspired by: +// https://stackoverflow.com/questions/21379484/looking-for-an-is-allocator-type-trait-for-use-in-enable-if + +#pragma once + +#include +#include + +#include + +namespace hpx { namespace traits { + + /////////////////////////////////////////////////////////////////////////// + namespace detail { + + template + struct has_allocate + { + private: + template + static std::false_type test(...); + + template + static std::true_type test(decltype(std::declval().allocate(0))); + + public: + static constexpr bool value = decltype(test(0))::value; + }; + + template + struct has_value_type + { + private: + template + static std::false_type test(...); + + template + static std::true_type test(typename U::value_type*); + + public: + static constexpr bool value = decltype(test(nullptr))::value; + }; + + template ::value> + struct has_deallocate + { + private: + using pointer = decltype(std::declval().allocate(0)); + + template + static auto test(Alloc&& a, Pointer&& p) + -> decltype(a.deallocate(p, 0), std::true_type()); + + template + static auto test(Alloc const& a, Pointer&& p) -> std::false_type; + + public: + static constexpr bool value = decltype(test( + std::declval(), std::declval()))::value; + }; + + template + struct has_deallocate + { + static constexpr bool value = false; + }; + } // namespace detail + + /////////////////////////////////////////////////////////////////////////// + template + struct is_allocator + : std::integral_constant::value && + detail::has_allocate::value && + detail::has_deallocate::value> + { + }; + + template + HPX_INLINE_CONSTEXPR_VARIABLE bool is_allocator_v = is_allocator::value; +}} // namespace hpx::traits diff --git a/libs/core/execution_base/include/hpx/execution_base/operation_state.hpp b/libs/core/execution_base/include/hpx/execution_base/operation_state.hpp index 560371cb8a12..22ac00c005f2 100644 --- a/libs/core/execution_base/include/hpx/execution_base/operation_state.hpp +++ b/libs/core/execution_base/include/hpx/execution_base/operation_state.hpp @@ -84,5 +84,6 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_operation_state_v = is_operation_state::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_operation_state_v = + is_operation_state::value; }}} // namespace hpx::execution::experimental diff --git a/libs/core/execution_base/include/hpx/execution_base/receiver.hpp b/libs/core/execution_base/include/hpx/execution_base/receiver.hpp index c7130aa07918..3860d1f8cec8 100644 --- a/libs/core/execution_base/include/hpx/execution_base/receiver.hpp +++ b/libs/core/execution_base/include/hpx/execution_base/receiver.hpp @@ -172,7 +172,7 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_receiver_v = is_receiver::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_receiver_v = is_receiver::value; /////////////////////////////////////////////////////////////////////// namespace detail { @@ -199,7 +199,8 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_receiver_of_v = is_receiver_of::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_receiver_of_v = + is_receiver_of::value; /////////////////////////////////////////////////////////////////////// namespace detail { @@ -227,6 +228,6 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_nothrow_receiver_of_v = + HPX_INLINE_CONSTEXPR_VARIABLE bool is_nothrow_receiver_of_v = is_nothrow_receiver_of::value; }}} // namespace hpx::execution::experimental diff --git a/libs/core/execution_base/include/hpx/execution_base/sender.hpp b/libs/core/execution_base/include/hpx/execution_base/sender.hpp index c8dd926161ae..6ea95cc5dc76 100644 --- a/libs/core/execution_base/include/hpx/execution_base/sender.hpp +++ b/libs/core/execution_base/include/hpx/execution_base/sender.hpp @@ -302,7 +302,7 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_sender_v = is_sender::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_sender_v = is_sender::value; struct invocable_archetype { @@ -649,10 +649,12 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_executor_v = is_executor::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_executor_v = + is_executor::value; template - constexpr bool is_executor_of_v = is_executor_of::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_executor_of_v = + is_executor_of::value; namespace detail { template @@ -941,7 +943,8 @@ namespace hpx { namespace execution { namespace experimental { }; template - constexpr bool is_scheduler_v = is_scheduler::value; + HPX_INLINE_CONSTEXPR_VARIABLE bool is_scheduler_v = + is_scheduler::value; template using connect_result_t = diff --git a/libs/parallelism/execution/CMakeLists.txt b/libs/parallelism/execution/CMakeLists.txt index e30e4f6578c2..1f0c2bd90f5f 100644 --- a/libs/parallelism/execution/CMakeLists.txt +++ b/libs/parallelism/execution/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2019 The STE||AR-Group +# Copyright (c) 2019-2021 The STE||AR-Group # # SPDX-License-Identifier: BSL-1.0 # Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -18,6 +18,7 @@ set(execution_headers hpx/execution/algorithms/keep_future.hpp hpx/execution/algorithms/let_error.hpp hpx/execution/algorithms/let_value.hpp + hpx/execution/algorithms/make_future.hpp hpx/execution/algorithms/on.hpp hpx/execution/algorithms/sync_wait.hpp hpx/execution/algorithms/transform.hpp @@ -41,7 +42,6 @@ set(execution_headers hpx/execution/executors/polymorphic_executor.hpp hpx/execution/executors/rebind_executor.hpp hpx/execution/executors/static_chunk_size.hpp - hpx/execution/sender_future.hpp hpx/execution/traits/detail/simd/vector_pack_alignment_size.hpp hpx/execution/traits/detail/simd/vector_pack_count_bits.hpp hpx/execution/traits/detail/simd/vector_pack_load_store.hpp diff --git a/libs/parallelism/execution/include/hpx/execution/algorithms/detail/partial_algorithm.hpp b/libs/parallelism/execution/include/hpx/execution/algorithms/detail/partial_algorithm.hpp index 5ddc3c6aee47..c0fb06d9cab8 100644 --- a/libs/parallelism/execution/include/hpx/execution/algorithms/detail/partial_algorithm.hpp +++ b/libs/parallelism/execution/include/hpx/execution/algorithms/detail/partial_algorithm.hpp @@ -31,6 +31,7 @@ namespace hpx { : ts(std::piecewise_construct, std::forward(ts)...) { } + partial_algorithm_base(partial_algorithm_base&&) = default; partial_algorithm_base& operator=(partial_algorithm_base&&) = default; partial_algorithm_base(partial_algorithm_base const&) = delete; diff --git a/libs/parallelism/execution/include/hpx/execution/algorithms/keep_future.hpp b/libs/parallelism/execution/include/hpx/execution/algorithms/keep_future.hpp index 0b5bc1abef8a..edd0a7902328 100644 --- a/libs/parallelism/execution/include/hpx/execution/algorithms/keep_future.hpp +++ b/libs/parallelism/execution/include/hpx/execution/algorithms/keep_future.hpp @@ -7,6 +7,8 @@ #pragma once #include +#include +#include #include #include #include @@ -30,31 +32,33 @@ namespace hpx { namespace execution { namespace experimental { void start() & noexcept { - try - { - auto state = hpx::traits::detail::get_shared_state(future); - - if (!state) - { - HPX_THROW_EXCEPTION(no_state, "operation_state::start", - "the future has no valid shared state asdasd"); - } - - // The operation state has to be kept alive until set_value - // is called, which means that we don't need to move - // receiver and future into the on_completed callback. - state->set_on_completed([this]() mutable { - hpx::execution::experimental::set_value( - std::move(receiver), std::move(future)); + hpx::detail::try_catch_exception_ptr( + [&]() { + auto state = + hpx::traits::detail::get_shared_state(future); + + if (!state) + { + HPX_THROW_EXCEPTION(no_state, + "operation_state::start", + "the future has no valid shared state"); + } + + // The operation state has to be kept alive until set_value + // is called, which means that we don't need to move + // receiver and future into the on_completed callback. + state->set_on_completed([this]() mutable { + hpx::execution::experimental::set_value( + std::move(receiver), std::move(future)); + }); + }, + [&](std::exception_ptr ep) { + hpx::execution::experimental::set_error( + std::move(receiver), std::move(ep)); }); - } - catch (...) - { - hpx::execution::experimental::set_error( - std::move(receiver), std::current_exception()); - } } }; + template struct keep_future_sender_base { @@ -143,9 +147,12 @@ namespace hpx { namespace execution { namespace experimental { : hpx::functional::tag_fallback { private: + // clang-format off template >::value>> + HPX_CONCEPT_REQUIRES_( + hpx::traits::is_future_v> + )> + // clang-format on friend constexpr HPX_FORCEINLINE auto tag_fallback_dispatch( keep_future_t, Future&& future) { diff --git a/libs/parallelism/execution/include/hpx/execution/algorithms/make_future.hpp b/libs/parallelism/execution/include/hpx/execution/algorithms/make_future.hpp new file mode 100644 index 000000000000..126cd44e5ceb --- /dev/null +++ b/libs/parallelism/execution/include/hpx/execution/algorithms/make_future.hpp @@ -0,0 +1,183 @@ +// Copyright (c) 2021 ETH Zurich +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace hpx { namespace execution { namespace experimental { + namespace detail { + template + struct make_future_receiver + { + hpx::intrusive_ptr< + hpx::lcos::detail::future_data_allocator> + data; + + void set_error(std::exception_ptr ep) && noexcept + { + data->set_exception(std::move(ep)); + data.reset(); + } + + void set_done() && noexcept + { + std::terminate(); + } + + template + void set_value(U&& u) && noexcept + { + hpx::detail::try_catch_exception_ptr( + [&]() { data->set_value(std::forward(u)); }, + [&](std::exception_ptr ep) { + data->set_exception(std::move(ep)); + }); + data.reset(); + } + }; + + template + struct make_future_receiver + { + hpx::intrusive_ptr< + hpx::lcos::detail::future_data_allocator> + data; + + void set_error(std::exception_ptr ep) && noexcept + { + data->set_exception(std::move(ep)); + data.reset(); + } + + void set_done() && noexcept + { + std::terminate(); + } + + void set_value() && noexcept + { + hpx::detail::try_catch_exception_ptr( + [&]() { data->set_value(hpx::util::unused); }, + [&](std::exception_ptr ep) { + data->set_exception(std::move(ep)); + }); + data.reset(); + } + }; + + template + struct future_data + : hpx::lcos::detail::future_data_allocator + { + HPX_NON_COPYABLE(future_data); + + using operation_state_type = std::decay_t; + using init_no_addref = + typename hpx::lcos::detail::future_data_allocator::init_no_addref; + using other_allocator = typename std::allocator_traits< + Allocator>::template rebind_alloc; + + operation_state_type os; + + template + future_data( + init_no_addref no_addref, other_allocator const& alloc, S&& s) + : hpx::lcos::detail::future_data_allocator( + no_addref, alloc) + , os(hpx::execution::experimental::connect(std::forward(s), + detail::make_future_receiver{this})) + { + hpx::execution::experimental::start(os); + } + }; + + /////////////////////////////////////////////////////////////////////// + template + auto make_future(S&& s, Allocator const& a) + { + using allocator_type = Allocator; + + using value_types = + typename hpx::execution::experimental::sender_traits< + std::decay_t>::template value_types; + using result_type = + std::decay_t>; + using operation_state_type = hpx::util::invoke_result_t< + hpx::execution::experimental::connect_t, S, + detail::make_future_receiver>; + + using shared_state = detail::future_data; + using init_no_addref = typename shared_state::init_no_addref; + using other_allocator = typename std::allocator_traits< + allocator_type>::template rebind_alloc; + using allocator_traits = std::allocator_traits; + using unique_ptr = std::unique_ptr>; + + other_allocator alloc(a); + unique_ptr p(allocator_traits::allocate(alloc, 1), + hpx::util::allocator_deleter{alloc}); + + allocator_traits::construct( + alloc, p.get(), init_no_addref{}, alloc, std::forward(s)); + + return hpx::traits::future_access>::create( + p.release(), false); + } + } // namespace detail + + /////////////////////////////////////////////////////////////////////////// + HPX_INLINE_CONSTEXPR_VARIABLE struct make_future_t final + : hpx::functional::tag_fallback + { + private: + // clang-format off + template , + HPX_CONCEPT_REQUIRES_( + is_sender_v && + hpx::traits::is_allocator_v + )> + // clang-format on + friend constexpr HPX_FORCEINLINE auto tag_fallback_dispatch( + make_future_t, S&& s, Allocator const& a = Allocator{}) + { + return detail::make_future(std::forward(s), a); + } + + // clang-format off + template , + HPX_CONCEPT_REQUIRES_( + hpx::traits::is_allocator_v + )> + // clang-format on + friend constexpr HPX_FORCEINLINE auto tag_fallback_dispatch( + make_future_t, Allocator const& a = Allocator{}) + { + return detail::partial_algorithm{a}; + } + } make_future{}; +}}} // namespace hpx::execution::experimental diff --git a/libs/parallelism/execution/include/hpx/execution/algorithms/sync_wait.hpp b/libs/parallelism/execution/include/hpx/execution/algorithms/sync_wait.hpp index 7cad3b67cb8e..9a11bb102810 100644 --- a/libs/parallelism/execution/include/hpx/execution/algorithms/sync_wait.hpp +++ b/libs/parallelism/execution/include/hpx/execution/algorithms/sync_wait.hpp @@ -149,7 +149,7 @@ namespace hpx { namespace execution { namespace experimental { void set_done() && noexcept { signal_set_called(); - }; + } void set_value() && noexcept { diff --git a/libs/parallelism/execution/include/hpx/execution/sender_future.hpp b/libs/parallelism/execution/include/hpx/execution/sender_future.hpp deleted file mode 100644 index d78e7c6ed059..000000000000 --- a/libs/parallelism/execution/include/hpx/execution/sender_future.hpp +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2021 ETH Zurich -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace hpx { namespace execution { namespace experimental { - namespace detail { - template - struct make_future_receiver - { - hpx::intrusive_ptr< - hpx::lcos::detail::future_data_allocator> - data; - - void set_error(std::exception_ptr e) noexcept - { - data->set_exception(e); - data.reset(); - } - - void set_done() noexcept - { - std::terminate(); - } - - template - void set_value(U&& u) noexcept - { - data->set_value(std::forward(u)); - data.reset(); - } - }; - - template - struct make_future_receiver - { - hpx::intrusive_ptr< - hpx::lcos::detail::future_data_allocator> - data; - - void set_error(std::exception_ptr e) noexcept - { - data->set_exception(e); - data.reset(); - } - - void set_done() noexcept - { - std::terminate(); - } - - void set_value() noexcept - { - data->set_value(hpx::util::unused); - data.reset(); - } - }; - - template - struct future_data - : hpx::lcos::detail::future_data_allocator - { - HPX_NON_COPYABLE(future_data); - - using operation_state_type = std::decay_t; - using init_no_addref = - typename hpx::lcos::detail::future_data_allocator::init_no_addref; - using other_allocator = typename std::allocator_traits< - Allocator>::template rebind_alloc; - - operation_state_type os; - - template - future_data( - init_no_addref no_addref, other_allocator const& alloc, S&& s) - : hpx::lcos::detail::future_data_allocator( - no_addref, alloc) - , os(hpx::execution::experimental::connect(std::forward(s), - detail::make_future_receiver{this})) - { - hpx::execution::experimental::start(os); - } - }; - } // namespace detail - - template , - typename = std::enable_if_t>> - auto make_future(S&& s, Allocator const& a = Allocator{}) - { - using allocator_type = Allocator; - - using value_types = - typename hpx::execution::experimental::sender_traits>::template value_types; - using result_type = std::decay_t>; - using operation_state_type = typename hpx::util::invoke_result< - hpx::execution::experimental::connect_t, S, - detail::make_future_receiver>::type; - - using shared_state = detail::future_data; - using init_no_addref = typename shared_state::init_no_addref; - using other_allocator = typename std::allocator_traits< - allocator_type>::template rebind_alloc; - using allocator_traits = std::allocator_traits; - using unique_ptr = std::unique_ptr>; - - other_allocator alloc(a); - unique_ptr p(allocator_traits::allocate(alloc, 1), - hpx::util::allocator_deleter{alloc}); - - allocator_traits::construct( - alloc, p.get(), init_no_addref{}, alloc, std::forward(s)); - - return hpx::traits::future_access>::create( - p.release(), false); - } -}}} // namespace hpx::execution::experimental diff --git a/libs/parallelism/executors/tests/unit/p0443_executor.cpp b/libs/parallelism/executors/tests/unit/p0443_executor.cpp index e792df73464d..2968d4e31d0d 100644 --- a/libs/parallelism/executors/tests/unit/p0443_executor.cpp +++ b/libs/parallelism/executors/tests/unit/p0443_executor.cpp @@ -66,7 +66,7 @@ struct check_context_receiver void set_done() noexcept { HPX_TEST(false); - }; + } template void set_value(Ts&&...) noexcept @@ -252,7 +252,7 @@ struct callback_receiver void set_done() noexcept { HPX_TEST(false); - }; + } template void set_value(Ts&&...) noexcept @@ -629,6 +629,7 @@ void test_when_all() HPX_TEST_EQ(y, std::string("hello")); }) | ex::sync_wait(); + HPX_TEST(false); } catch (std::runtime_error const& e) { @@ -667,6 +668,7 @@ void test_when_all() HPX_TEST_EQ(y, std::string("hello")); }) | ex::sync_wait(); + HPX_TEST(false); } catch (std::runtime_error const& e) { @@ -698,11 +700,12 @@ void test_future_sender() try { ex::sync_wait(std::move(f)); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -723,11 +726,12 @@ void test_future_sender() try { ex::sync_wait(std::move(f)); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -763,11 +767,12 @@ void test_future_sender() try { ex::sync_wait(sf); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -790,11 +795,12 @@ void test_future_sender() try { ex::sync_wait(sf); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -811,6 +817,16 @@ void test_future_sender() HPX_TEST_EQ(f.get(), 3); } + { + auto f = ex::just(3) | ex::make_future(); + HPX_TEST_EQ(f.get(), 3); + } + + { + auto f = ex::just_on(ex::executor{}, 3) | ex::make_future(); + HPX_TEST_EQ(f.get(), 3); + } + { std::atomic called{false}; auto s = ex::schedule(ex::executor{}) | @@ -1050,6 +1066,7 @@ void test_let_value() HPX_TEST(false); return ex::just(0); }) | ex::sync_wait(); + HPX_TEST(false); } catch (std::runtime_error const& e) { @@ -1294,11 +1311,12 @@ void test_keep_future_sender() try { ex::sync_wait(std::move(f) | ex::keep_future()); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -1322,11 +1340,12 @@ void test_keep_future_sender() try { ex::sync_wait(std::move(f) | ex::keep_future()); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -1357,11 +1376,12 @@ void test_keep_future_sender() try { ex::sync_wait(sf | ex::keep_future()); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } @@ -1381,11 +1401,12 @@ void test_keep_future_sender() try { ex::sync_wait(sf | ex::keep_future()); + HPX_TEST(false); } catch (...) { exception_thrown = true; - }; + } HPX_TEST(exception_thrown); } diff --git a/libs/parallelism/futures/include/hpx/futures/future.hpp b/libs/parallelism/futures/include/hpx/futures/future.hpp index 6b7550b2a96d..3df489d8b285 100644 --- a/libs/parallelism/futures/include/hpx/futures/future.hpp +++ b/libs/parallelism/futures/include/hpx/futures/future.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2019 Hartmut Kaiser +// Copyright (c) 2007-2021 Hartmut Kaiser // Copyright (c) 2013 Agustin Berge // // SPDX-License-Identifier: BSL-1.0 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -481,6 +482,109 @@ namespace hpx { namespace lcos { namespace detail { addref); } + /////////////////////////////////////////////////////////////////////////// + // Operation state for sender compatibility + template + class operation_state + { + private: + using receiver_type = std::decay_t; + using future_type = std::decay_t; + using result_type = typename future_type::result_type; + + public: + template + operation_state(Receiver_&& r, future_type f) + : receiver_(std::forward(r)) + , future_(std::move(f)) + { + } + + operation_state(operation_state&&) = delete; + operation_state& operator=(operation_state&&) = delete; + operation_state(operation_state const&) = delete; + operation_state& operator=(operation_state const&) = delete; + + void start() noexcept + { + start_helper(std::is_void{}); + } + + private: + void start_helper(std::true_type) noexcept + { + hpx::detail::try_catch_exception_ptr( + [&]() { + auto state = traits::detail::get_shared_state(future_); + + if (!state) + { + HPX_THROW_EXCEPTION(no_state, "operation_state::start", + "the future has no valid shared state"); + } + + // The operation state has to be kept alive until set_value is + // called, which means that we don't need to move receiver and + // future into the on_completed callback. + state->set_on_completed([this]() mutable { + if (future_.has_value()) + { + hpx::execution::experimental::set_value( + std::move(receiver_)); + } + else if (future_.has_exception()) + { + hpx::execution::experimental::set_error( + std::move(receiver_), + future_.get_exception_ptr()); + } + }); + }, + [&](std::exception_ptr ep) { + hpx::execution::experimental::set_error( + std::move(receiver_), std::move(ep)); + }); + } + + void start_helper(std::false_type) noexcept + { + hpx::detail::try_catch_exception_ptr( + [&]() { + auto state = traits::detail::get_shared_state(future_); + + if (!state) + { + HPX_THROW_EXCEPTION(no_state, "operation_state::start", + "the future has no valid shared state"); + } + + // The operation state has to be kept alive until set_value is + // called, which means that we don't need to move receiver and + // future into the on_completed callback. + state->set_on_completed([this]() mutable { + if (future_.has_value()) + { + hpx::execution::experimental::set_value( + std::move(receiver_), future_.get()); + } + else if (future_.has_exception()) + { + hpx::execution::experimental::set_error( + std::move(receiver_), + future_.get_exception_ptr()); + } + }); + }, + [&](std::exception_ptr ep) { + hpx::execution::experimental::set_error( + std::move(receiver_), std::move(ep)); + }); + } + + std::decay_t receiver_; + future_type future_; + }; + /////////////////////////////////////////////////////////////////////////// template class future_base @@ -490,6 +594,25 @@ namespace hpx { namespace lcos { namespace detail { using shared_state_type = future_data_base< typename traits::detail::shared_state_ptr_result::type>; + // Sender compatibility + template