diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5b856872c90e..a16f9e14a814 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -105,12 +105,9 @@ jobs:
           command: |
               git clone --depth=1 file:///hpx/source-full source
       - run:
-          name: Downloading CTest XML to Junit XML
+          name: Copying CTest XML to Junit XML
           command: |
-              curl \
-                https://raw.githubusercontent.com/Kitware/CDash/master/app/cdash/tests/circle/conv.xsl \
-                --fail \
-                -o /hpx/conv.xsl
+              cp /hpx/source/.circleci/conv.xsl /hpx/conv.xsl
       - persist_to_workspace:
           root: /hpx
           paths:
diff --git a/.circleci/conv.xsl b/.circleci/conv.xsl
new file mode 100644
index 000000000000..e5b22ded6005
--- /dev/null
+++ b/.circleci/conv.xsl
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="Windows-1252" ?>
+<!--  Copyright (c) 2024 Hartmut Kaiser                                            -->
+<!--                                                                               -->
+<!--  SPDX-License-Identifier: BSL-1.0                                             -->
+<!--  Distributed under the Boost Software License, Version 1.0. (See accompanying -->
+<!--  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)        -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+    <xsl:output method="xml" indent="yes"/>
+    <xsl:template match="/Site">
+        <xsl:variable name="Name"><xsl:value-of select="@Name"/></xsl:variable>
+        <xsl:variable name="Hostname"><xsl:value-of select="@Hostname"/></xsl:variable>
+        <xsl:variable name="TestCount"><xsl:value-of select="count(//TestList/Test)"/></xsl:variable>
+        <xsl:variable name="ErrorCount"><xsl:value-of select="count(//TestList/Test[@Status='error'])"/></xsl:variable>
+        <xsl:variable name="FailureCount"><xsl:value-of select="count(//Testing/Test[@Status='failed'])"/></xsl:variable>
+        <testsuite name="{$Name}" hostname="{$Hostname}" errors="0" failures="{$FailureCount}" tests="{$TestCount}">
+            <xsl:variable name="BuildName"><xsl:value-of select="@BuildName"/></xsl:variable>
+            <xsl:variable name="BuildStamp"><xsl:value-of select="@BuildStamp"/></xsl:variable>
+            <xsl:variable name="Generator"><xsl:value-of select="@Generator"/></xsl:variable>
+            <xsl:variable name="CompilerName"><xsl:value-of select="@CompilerName"/></xsl:variable>
+            <xsl:variable name="OSName"><xsl:value-of select="@OSName"/></xsl:variable>
+            <xsl:variable name="OSRelease"><xsl:value-of select="@OSRelease"/></xsl:variable>
+            <xsl:variable name="OSVersion"><xsl:value-of select="@OSVersion"/></xsl:variable>
+            <xsl:variable name="OSPlatform"><xsl:value-of select="@OSPlatform"/></xsl:variable>
+            <xsl:variable name="Is64Bits"><xsl:value-of select="@Is64Bits"/></xsl:variable>
+            <xsl:variable name="VendorString"><xsl:value-of select="@VendorString"/></xsl:variable>
+            <xsl:variable name="VendorID"><xsl:value-of select="@VendorID"/></xsl:variable>
+            <xsl:variable name="FamilyID"><xsl:value-of select="@FamilyID"/></xsl:variable>
+            <xsl:variable name="ModelID"><xsl:value-of select="@ModelID"/></xsl:variable>
+            <xsl:variable name="ProcessorCacheSize"><xsl:value-of select="@ProcessorCacheSize"/></xsl:variable>
+            <xsl:variable name="NumberOfLogicalCPU"><xsl:value-of select="@NumberOfLogicalCPU"/></xsl:variable>
+            <xsl:variable name="NumberOfPhysicalCPU"><xsl:value-of select="@NumberOfPhysicalCPU"/></xsl:variable>
+            <xsl:variable name="TotalVirtualMemory"><xsl:value-of select="@TotalVirtualMemory"/></xsl:variable>
+            <xsl:variable name="TotalPhysicalMemory"><xsl:value-of select="@TotalPhysicalMemory"/></xsl:variable>
+            <xsl:variable name="LogicalProcessorsPerPhysical"><xsl:value-of select="@LogicalProcessorsPerPhysical"/></xsl:variable>
+            <xsl:variable name="ProcessorClockFrequency"><xsl:value-of select="@ProcessorClockFrequency"/></xsl:variable>
+            <properties>
+                <property name="BuildName" value="{$BuildName}"/>
+                <property name="BuildStamp" value="{$BuildStamp}"/>
+                <property name="Name" value="{$Name}"/>
+                <property name="Generator" value="{$Generator}"/>
+                <property name="CompilerName" value="{$CompilerName}"/>
+                <property name="OSName" value="{$OSName}"/>
+                <property name="Hostname" value="{$Hostname}"/>
+                <property name="OSRelease" value="{$OSRelease}"/>
+                <property name="OSVersion" value="{$OSVersion}"/>
+                <property name="OSPlatform" value="{$OSPlatform}"/>
+                <property name="Is64Bits" value="{$Is64Bits}"/>
+                <property name="VendorString" value="{$VendorString}"/>
+                <property name="VendorID" value="{$VendorID}"/>
+                <property name="FamilyID" value="{$FamilyID}"/>
+                <property name="ModelID" value="{$ModelID}"/>
+                <property name="ProcessorCacheSize" value="{$ProcessorCacheSize}"/>
+                <property name="NumberOfLogicalCPU" value="{$NumberOfLogicalCPU}"/>
+                <property name="NumberOfPhysicalCPU" value="{$NumberOfPhysicalCPU}"/>
+                <property name="TotalVirtualMemory" value="{$TotalVirtualMemory}"/>
+                <property name="TotalPhysicalMemory" value="{$TotalPhysicalMemory}"/>
+                <property name="LogicalProcessorsPerPhysical" value="{$LogicalProcessorsPerPhysical}"/>
+                <property name="ProcessorClockFrequency" value="{$ProcessorClockFrequency}"/>
+            </properties>
+            <xsl:apply-templates select="Testing/Test"/>
+            <system-out>
+                BuildName: <xsl:value-of select="$BuildName"/>
+                BuildStamp: <xsl:value-of select="$BuildStamp"/>
+                Name: <xsl:value-of select="$Name"/>
+                Generator: <xsl:value-of select="$Generator"/>
+                CompilerName: <xsl:value-of select="$CompilerName"/>
+                OSName: <xsl:value-of select="$OSName"/>
+                Hostname: <xsl:value-of select="$Hostname"/>
+                OSRelease: <xsl:value-of select="$OSRelease"/>
+                OSVersion: <xsl:value-of select="$OSVersion"/>
+                OSPlatform: <xsl:value-of select="$OSPlatform"/>
+                Is64Bits: <xsl:value-of select="$Is64Bits"/>
+                VendorString: <xsl:value-of select="$VendorString"/>
+                VendorID: <xsl:value-of select="$VendorID"/>
+                FamilyID: <xsl:value-of select="$FamilyID"/>
+                ModelID: <xsl:value-of select="$ModelID"/>
+                ProcessorCacheSize: <xsl:value-of select="$ProcessorCacheSize"/>
+                NumberOfLogicalCPU: <xsl:value-of select="$NumberOfLogicalCPU"/>
+                NumberOfPhysicalCPU: <xsl:value-of select="$NumberOfPhysicalCPU"/>
+                TotalVirtualMemory: <xsl:value-of select="$TotalVirtualMemory"/>
+                TotalPhysicalMemory: <xsl:value-of select="$TotalPhysicalMemory"/>
+                LogicalProcessorsPerPhysical: <xsl:value-of select="$LogicalProcessorsPerPhysical"/>
+                ProcessorClockFrequency: <xsl:value-of select="$ProcessorClockFrequency"/>
+            </system-out>
+        </testsuite>
+    </xsl:template>
+    <xsl:template match="Testing/Test">
+        <xsl:variable name="testcasename"><xsl:value-of select="Name"/></xsl:variable>
+        <xsl:variable name="testclassname"><xsl:value-of select="substring(Path,2)"/></xsl:variable>
+        <xsl:variable name="exectime">
+            <xsl:for-each select="Results/NamedMeasurement">
+                <xsl:if test="@name = 'Execution Time'">
+                    <xsl:value-of select="."/>
+                </xsl:if>
+            </xsl:for-each>
+        </xsl:variable>
+        <testcase name="{$testcasename}" classname="{$testclassname}" time="{$exectime}">
+            <xsl:if test="@Status = 'passed'"></xsl:if>
+            <xsl:if test="@Status = 'failed'">
+                <xsl:variable name="failtype">
+                    <xsl:for-each select="Results/NamedMeasurement">
+                        <xsl:if test="@name = 'Exit Code'">
+                            <xsl:value-of select="."/>
+                        </xsl:if>
+                    </xsl:for-each>
+                </xsl:variable>
+                <xsl:variable name="failcode">
+                    <xsl:for-each select="Results/NamedMeasurement">
+                        <xsl:if test="@name = 'Exit Value'">
+                            <xsl:value-of select="."/>
+                        </xsl:if>
+                    </xsl:for-each>
+                </xsl:variable>
+                <failure message="{$failtype} ({$failcode})"><xsl:value-of select="Results/Measurement/Value/text()"/></failure>
+            </xsl:if>
+            <xsl:if test="@Status = 'notrun'">
+                <skipped><xsl:value-of select="Results/Measurement/Value/text()"/></skipped>
+            </xsl:if>
+        </testcase>
+    </xsl:template>
+</xsl:stylesheet>
diff --git a/.github/workflows/windows_release_gcc_mingw.yml b/.github/workflows/windows_release_gcc_mingw.yml
index 97bb5685f0c3..b36c02773eda 100644
--- a/.github/workflows/windows_release_gcc_mingw.yml
+++ b/.github/workflows/windows_release_gcc_mingw.yml
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 The STE||AR-Group
+# Copyright (c) 2023-2024 The STE||AR-Group
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -22,7 +22,7 @@ jobs:
         choco install ninja -y
         md C:\projects
         $client = new-object System.Net.WebClient
-        $client.DownloadFile("https://master.dl.sourceforge.net/project/boost/boost/1.78.0/boost_1_78_0.7z","C:\projects\boost_1_78_0.7z")
+        $client.DownloadFile("https://archives.boost.io/release/1.78.0/source/boost_1_78_0.7z","C:\projects\boost_1_78_0.7z")
         7z x C:\projects\boost_1_78_0.7z -y -oC:\projects\boost
         cd C:\projects\boost\boost_1_78_0
         .\bootstrap.bat gcc
diff --git a/cmake/HPX_SetupBoost.cmake b/cmake/HPX_SetupBoost.cmake
index 43360e5571db..256b56e169f8 100644
--- a/cmake/HPX_SetupBoost.cmake
+++ b/cmake/HPX_SetupBoost.cmake
@@ -117,13 +117,6 @@ if(NOT TARGET hpx_dependencies_boost)
   endif()
 
   set(__boost_libraries "")
-  if(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING
-     OR HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE
-  )
-    set(__boost_libraries ${__boost_libraries} log log_setup date_time chrono
-                          thread
-    )
-  endif()
 
   if(HPX_WITH_GENERIC_CONTEXT_COROUTINES)
     # if context is needed, we should still link with boost thread and chrono
diff --git a/cmake/toolchains/Cray.cmake b/cmake/toolchains/Cray.cmake
index 83b9c051f133..e2f369f063ff 100644
--- a/cmake/toolchains/Cray.cmake
+++ b/cmake/toolchains/Cray.cmake
@@ -70,27 +70,6 @@ set(HPX_WITH_PARCELPORT_MPI_MULTITHREADED
     CACHE BOOL ""
 )
 
-set(HPX_WITH_PARCELPORT_LIBFABRIC
-    ON
-    CACHE BOOL ""
-)
-set(HPX_PARCELPORT_LIBFABRIC_PROVIDER
-    "gni"
-    CACHE STRING "See libfabric docs for details, gni,verbs,psm2 etc etc"
-)
-set(HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS
-    "256"
-    CACHE STRING "Max number of messages in flight at once"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE
-    OFF
-    CACHE BOOL "Custom libfabric logging flag"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING
-    OFF
-    CACHE BOOL "Libfabric parcelport logging on/off flag"
-)
-
 # We do a cross compilation here ...
 set(CMAKE_CROSSCOMPILING
     ON
diff --git a/cmake/toolchains/CrayKNL.cmake b/cmake/toolchains/CrayKNL.cmake
index 126bcc9a0385..17d06245d37f 100644
--- a/cmake/toolchains/CrayKNL.cmake
+++ b/cmake/toolchains/CrayKNL.cmake
@@ -68,27 +68,6 @@ set(HPX_WITH_PARCELPORT_MPI_MULTITHREADED
     CACHE BOOL ""
 )
 
-set(HPX_WITH_PARCELPORT_LIBFABRIC
-    ON
-    CACHE BOOL ""
-)
-set(HPX_PARCELPORT_LIBFABRIC_PROVIDER
-    "gni"
-    CACHE STRING "See libfabric docs for details, gni,verbs,psm2 etc etc"
-)
-set(HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS
-    "256"
-    CACHE STRING "Max number of messages in flight at once"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE
-    OFF
-    CACHE BOOL "Custom libfabric logging flag"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING
-    OFF
-    CACHE BOOL "Libfabric parcelport logging on/off flag"
-)
-
 # Set the TBBMALLOC_PLATFORM correctly so that find_package(TBBMalloc) sets the
 # right hints
 set(TBBMALLOC_PLATFORM
diff --git a/cmake/toolchains/CrayKNLStatic.cmake b/cmake/toolchains/CrayKNLStatic.cmake
index 97843059eaa7..76e6160ba239 100644
--- a/cmake/toolchains/CrayKNLStatic.cmake
+++ b/cmake/toolchains/CrayKNLStatic.cmake
@@ -52,27 +52,6 @@ set(HPX_WITH_PARCELPORT_MPI_MULTITHREADED
     CACHE BOOL ""
 )
 
-set(HPX_WITH_PARCELPORT_LIBFABRIC
-    ON
-    CACHE BOOL ""
-)
-set(HPX_PARCELPORT_LIBFABRIC_PROVIDER
-    "gni"
-    CACHE STRING "See libfabric docs for details, gni,verbs,psm2 etc etc"
-)
-set(HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS
-    "256"
-    CACHE STRING "Max number of messages in flight at once"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE
-    OFF
-    CACHE BOOL "Custom libfabric logging flag"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING
-    OFF
-    CACHE BOOL "Libfabric parcelport logging on/off flag"
-)
-
 # Set the TBBMALLOC_PLATFORM correctly so that find_package(TBBMalloc) sets the
 # right hints
 set(TBBMALLOC_PLATFORM
diff --git a/cmake/toolchains/CrayStatic.cmake b/cmake/toolchains/CrayStatic.cmake
index f89757a2e72c..6d1bc2061085 100644
--- a/cmake/toolchains/CrayStatic.cmake
+++ b/cmake/toolchains/CrayStatic.cmake
@@ -62,24 +62,3 @@ set(HPX_WITH_PARCELPORT_MPI_MULTITHREADED
     ON
     CACHE BOOL ""
 )
-
-set(HPX_WITH_PARCELPORT_LIBFABRIC
-    ON
-    CACHE BOOL ""
-)
-set(HPX_PARCELPORT_LIBFABRIC_PROVIDER
-    "gni"
-    CACHE STRING "See libfabric docs for details, gni,verbs,psm2 etc etc"
-)
-set(HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS
-    "256"
-    CACHE STRING "Max number of messages in flight at once"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE
-    OFF
-    CACHE BOOL "Custom libfabric logging flag"
-)
-set(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING
-    OFF
-    CACHE BOOL "Libfabric parcelport logging on/off flag"
-)
diff --git a/libs/core/algorithms/CMakeLists.txt b/libs/core/algorithms/CMakeLists.txt
index 6fcfed897e2f..9090345722df 100644
--- a/libs/core/algorithms/CMakeLists.txt
+++ b/libs/core/algorithms/CMakeLists.txt
@@ -37,7 +37,9 @@ set(algorithms_headers
     hpx/parallel/algorithms/detail/parallel_stable_sort.hpp
     hpx/parallel/algorithms/detail/pivot.hpp
     hpx/parallel/algorithms/detail/reduce.hpp
+    hpx/parallel/algorithms/detail/reduce_deterministic.hpp
     hpx/parallel/algorithms/detail/replace.hpp
+    hpx/parallel/algorithms/detail/rfa.hpp
     hpx/parallel/algorithms/detail/rotate.hpp
     hpx/parallel/algorithms/detail/sample_sort.hpp
     hpx/parallel/algorithms/detail/search.hpp
@@ -72,6 +74,7 @@ set(algorithms_headers
     hpx/parallel/algorithms/partition.hpp
     hpx/parallel/algorithms/reduce_by_key.hpp
     hpx/parallel/algorithms/reduce.hpp
+    hpx/parallel/algorithms/reduce_deterministic.hpp
     hpx/parallel/algorithms/remove_copy.hpp
     hpx/parallel/algorithms/remove.hpp
     hpx/parallel/algorithms/replace.hpp
diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp
index b2de030eed8b..b37730889172 100644
--- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp
+++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/reduce_deterministic.hpp
@@ -11,9 +11,9 @@
 #include <hpx/functional/invoke.hpp>
 #include <hpx/parallel/algorithms/detail/rfa.hpp>
 #include <hpx/parallel/util/loop.hpp>
-#include <hpx/type_support/pack.hpp>
 
 #include <cstddef>
+#include <cstring>
 #include <limits>
 #include <type_traits>
 #include <utility>
@@ -33,9 +33,11 @@ namespace hpx::parallel::detail {
             sequential_reduce_deterministic_t, ExPolicy&&, InIterB first,
             InIterE last, T init, Reduce&& r)
         {
+            /// TODO: Put constraint on Reduce to be a binary plus operator
+            (void) r;
             hpx::parallel::detail::rfa::RFA_bins<T> bins;
             bins.initialize_bins();
-            std::memcpy(rfa::bin_host_buffer, &bins, sizeof(bins));
+            std::memcpy(rfa::__rfa_bin_host_buffer__, &bins, sizeof(bins));
 
             hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<T> rfa;
             rfa.set_max_abs_val(init);
@@ -63,84 +65,6 @@ namespace hpx::parallel::detail {
         }
     };
 
-    template <typename ExPolicy>
-    struct sequential_reduce_deterministic_rfa_t final
-      : hpx::functional::detail::tag_fallback<
-            sequential_reduce_deterministic_rfa_t<ExPolicy>>
-    {
-    private:
-        template <typename InIterB, typename InIterE, typename T,
-            typename Reduce>
-        friend constexpr hpx::parallel::detail::rfa::
-            ReproducibleFloatingAccumulator<T>
-            tag_fallback_invoke(sequential_reduce_deterministic_rfa_t,
-                ExPolicy&&, InIterB first, InIterE last, T init, Reduce&& r)
-        {
-            hpx::parallel::detail::rfa::RFA_bins<T> bins;
-            bins.initialize_bins();
-            std::memcpy(rfa::bin_host_buffer, &bins, sizeof(bins));
-
-            hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<T> rfa;
-
-            for (auto e = first; e != last; ++e)
-            {
-                rfa += *e;
-            }
-            return rfa;
-        }
-
-        template <typename InIterB, typename T, typename Reduce>
-        friend constexpr hpx::parallel::detail::rfa::
-            ReproducibleFloatingAccumulator<T>
-            tag_fallback_invoke(sequential_reduce_deterministic_rfa_t,
-                ExPolicy&&, InIterB first, std::size_t size, T init, Reduce&& r)
-        {
-            hpx::parallel::detail::rfa::RFA_bins<T> bins;
-            bins.initialize_bins();
-            std::memcpy(rfa::bin_host_buffer, &bins, sizeof(bins));
-
-            hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<T> rfa;
-            auto e = first;
-            for (std::size_t i = 0; i < size; ++i, ++e)
-            {
-                rfa += *e;
-            }
-            return rfa;
-        }
-
-        // template <typename InIterB, typename InIterE, typename T,
-        //     typename Reduce
-        //     // typename = std::enable_if_t<hpx::util::contains<T,
-        //     //     hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<
-        //     //         float>,
-        //     //     hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<
-        //     //         double>>::value>
-        //     >
-        // friend constexpr T tag_fallback_invoke(
-        //     sequential_reduce_deterministic_rfa_t, ExPolicy&&, InIterB first,
-        //     InIterE last, T init, Reduce&& r)
-        // {
-        //     static_assert(hpx::util::contains<T,
-        //         hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<
-        //             float>,
-        //         hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<
-        //             double>>::value);
-        //     hpx::parallel::detail::rfa::RFA_bins<T> bins;
-        //     bins.initialize_bins();
-        //     std::memcpy(rfa::bin_host_buffer, &bins, sizeof(bins));
-
-        //     hpx::parallel::detail::rfa::ReproducibleFloatingAccumulator<T> rfa;
-        //     rfa.set_max_abs_val(init);
-        //     rfa.unsafe_add(init);
-        //     rfa.renorm();
-        //     for (auto e = first; e != last; ++e)
-        //     {
-        //         rfa += *e;
-        //     }
-        //     return rfa.conv();
-        // }
-    };
-
 #if !defined(HPX_COMPUTE_DEVICE_CODE)
     template <typename ExPolicy>
     inline constexpr sequential_reduce_deterministic_t<ExPolicy>
@@ -156,18 +80,4 @@ namespace hpx::parallel::detail {
     }
 #endif
 
-#if !defined(HPX_COMPUTE_DEVICE_CODE)
-    template <typename ExPolicy>
-    inline constexpr sequential_reduce_deterministic_rfa_t<ExPolicy>
-        sequential_reduce_deterministic_rfa =
-            sequential_reduce_deterministic_rfa_t<ExPolicy>{};
-#else
-    template <typename ExPolicy, typename... Args>
-    HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_reduce_deterministic_rfa(
-        Args&&... args)
-    {
-        return sequential_reduce_deterministic_rfa_t<ExPolicy>{}(
-            std::forward<Args>(args)...);
-    }
-#endif
 }    // namespace hpx::parallel::detail
diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp
index 4a9910cb6faa..fa9142cdf80b 100644
--- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp
+++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp
@@ -1,3 +1,34 @@
+//  Copyright (c) 2024 Shreyas Atre
+//
+//  SPDX-License-Identifier: BSL-1.0
+//  Distributed under the Boost Software License, Version 1.0. (See accompanying
+//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// ---------------------------------------------------------------------------
+// This file has been taken from
+// https://github.com/maddyscientist/reproducible_floating_sums commit
+// b5a065741d4ea459437ca004b508de9dcb6a3e52. The boost copyright has been added
+// to this file in accordance with the dual license terms for the Reproducible
+// Floating-Point Summations and conformance with the HPX policy
+// https://github.com/maddyscientist/reproducible_floating_sums/blob/feature/cuda/LICENSE.md
+// ---------------------------------------------------------------------------
+//
+/// Copyright 2022 Richard Barnes, Peter Ahrens, James Demmel
+/// Permission is hereby granted, free of charge, to any person obtaining a copy
+/// of this software and associated documentation files (the "Software"), to deal
+/// in the Software without restriction, including without limitation the rights
+/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+/// copies of the Software, and to permit persons to whom the Software is
+/// furnished to do so, subject to the following conditions:
+/// The above copyright notice and this permission notice shall be included in
+/// all copies or substantial portions of the Software.
+/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+/// SOFTWARE.
 //Reproducible Floating Point Accumulations via Binned Floating Point
 //Adapted to C++ by Richard Barnes from ReproBLAS v2.1.0.
 //ReproBLAS by Peter Ahrens, Hong Diep Nguyen, and James Demmel.
@@ -26,6 +57,10 @@
 #include <cmath>
 #include <cstdint>
 #include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <hpx/config.hpp>
 
 namespace hpx::parallel::detail::rfa {
     template <typename F>
@@ -163,7 +198,7 @@ namespace hpx::parallel::detail::rfa {
         }
     };
 
-    static char bin_host_buffer[sizeof(RFA_bins<double>)];
+    static char __rfa_bin_host_buffer__[sizeof(RFA_bins<double>)];
 
     ///Class to hold a reproducible summation of the numbers passed to it
     ///
@@ -179,7 +214,7 @@ namespace hpx::parallel::detail::rfa {
         static constexpr int FOLD = FOLD_;
 
     private:
-        std::array<ftype, 2 * FOLD> data = {0};
+        std::array<ftype, 2 * FOLD> data = {{0}};
 
         ///Floating-point precision bin width
         static constexpr auto BIN_WIDTH =
@@ -214,7 +249,8 @@ namespace hpx::parallel::detail::rfa {
         ///Return a binned floating-point reference bin
         inline const ftype* binned_bins(const int x) const
         {
-            return &reinterpret_cast<RFA_bins<ftype>&>(bin_host_buffer)[x];
+            return &reinterpret_cast<RFA_bins<ftype>&>(
+                __rfa_bin_host_buffer__)[x];
         }
 
         ///Get the bit representation of a float
@@ -350,21 +386,21 @@ namespace hpx::parallel::detail::rfa {
 
         ///Get index of float-point precision
         ///The index of a non-binned type is the smallest index a binned type would
-        ///need to have to sum it reproducibly. Higher indicies correspond to smaller
+        ///need to have to sum it reproducibly. Higher indices correspond to smaller
         ///bins.
         static inline constexpr int binned_dindex(const ftype x)
         {
             int exp = EXP(x);
             if (exp == 0)
             {
-                if (x == 0.0)
+                if (x == static_cast<ftype>(0.0))
                 {
                     return MAXINDEX;
                 }
                 else
                 {
                     std::frexp(x, &exp);
-                    return std::max((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX);
+                    return (std::max)((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX);
                 }
             }
             return ((MAX_EXP + EXP_BIAS) - exp) / BIN_WIDTH;
@@ -372,7 +408,7 @@ namespace hpx::parallel::detail::rfa {
 
         ///Get index of manually specified binned double precision
         ///The index of a binned type is the bin that it corresponds to. Higher
-        ///indicies correspond to smaller bins.
+        ///indices correspond to smaller bins.
         inline int binned_index() const
         {
             return ((MAX_EXP + MANT_DIG - BIN_WIDTH + 1 + EXP_BIAS) -
@@ -415,7 +451,9 @@ namespace hpx::parallel::detail::rfa {
                 int shift = binned_index() - X_index;
                 if (shift > 0)
                 {
-#pragma unroll
+#if !defined(HPX_CLANG_VERSION)
+                    HPX_UNROLL
+#endif
                     for (int i = FOLD - 1; i >= 1; i--)
                     {
                         if (i < shift)
@@ -424,7 +462,9 @@ namespace hpx::parallel::detail::rfa {
                         carry(i * inccarY) = carry((i - shift) * inccarY);
                     }
                     const ftype* const bins = binned_bins(X_index);
-#pragma unroll
+#if !defined(HPX_CLANG_VERSION)
+                    HPX_UNROLL
+#endif
                     for (int j = 0; j < FOLD; j++)
                     {
                         if (j >= shift)
@@ -456,16 +496,19 @@ namespace hpx::parallel::detail::rfa {
             if (binned_index0())
             {
                 M = primary(0);
-                ftype qd = x * COMPRESSION;
+                ftype qd = x * static_cast<ftype>(COMPRESSION);
                 auto& ql = get_bits(qd);
                 ql |= 1;
                 qd += M;
                 primary(0) = qd;
                 M -= qd;
-                M *= EXPANSION * 0.5;
+                auto temp_m = (double) (((double) EXPANSION) * 0.5);
+                M *= static_cast<ftype>(temp_m);
                 x += M;
                 x += M;
-#pragma unroll
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = 1; i < FOLD - 1; i++)
                 {
                     M = primary(i * incpriY);
@@ -484,7 +527,9 @@ namespace hpx::parallel::detail::rfa {
             {
                 ftype qd = x;
                 auto& ql = get_bits(qd);
-#pragma unroll
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = 0; i < FOLD - 1; i++)
                 {
                     M = primary(i * incpriY);
@@ -549,7 +594,7 @@ namespace hpx::parallel::detail::rfa {
             int i = 0;
 
             if (ISNANINF(primary(0)))
-                return primary(0);
+                return (double) primary(0);
             if (ISZERO(primary(0)))
                 return 0.0;
 
@@ -563,29 +608,36 @@ namespace hpx::parallel::detail::rfa {
             {
                 scale_down = std::ldexp(0.5, 1 - (2 * MANT_DIG - BIN_WIDTH));
                 scale_up = std::ldexp(0.5, 1 + (2 * MANT_DIG - BIN_WIDTH));
-                scaled = std::max(
-                    std::min(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0);
+                scaled = (std::max)(
+                    (std::min)(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0);
                 if (X_index == 0)
                 {
-                    Y += carry(0) * ((bins[0] / 6.0) * scale_down * EXPANSION);
-                    Y += carry(inccarX) * ((bins[1] / 6.0) * scale_down);
-                    Y += (primary(0) - bins[0]) * scale_down * EXPANSION;
+                    Y += ((double) carry(0)) *
+                        ((((double) bins[0]) / 6.0) * scale_down * EXPANSION);
+                    Y += ((double) carry(inccarX)) *
+                        ((((double) bins[1]) / 6.0) * scale_down);
+                    Y += ((double) primary(0) - (double) bins[0]) * scale_down *
+                        EXPANSION;
                     i = 2;
                 }
                 else
                 {
-                    Y += carry(0) * ((bins[0] / 6.0) * scale_down);
+                    Y += ((double) carry(0)) *
+                        (((double) bins[0] / 6.0) * scale_down);
                     i = 1;
                 }
                 for (; i < scaled; i++)
                 {
-                    Y += carry(i * inccarX) * ((bins[i] / 6.0) * scale_down);
-                    Y +=
-                        (primary((i - 1) * incpriX) - bins[i - 1]) * scale_down;
+                    Y += ((double) carry(i * inccarX)) *
+                        (((double) bins[i] / 6.0) * scale_down);
+                    Y += ((double) primary((i - 1) * incpriX) -
+                             (double) (bins[i - 1])) *
+                        scale_down;
                 }
                 if (i == FOLD)
                 {
-                    Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]) *
+                    Y += ((double) primary((FOLD - 1) * incpriX) -
+                             (double) (bins[FOLD - 1])) *
                         scale_down;
                     return Y * scale_up;
                 }
@@ -596,20 +648,23 @@ namespace hpx::parallel::detail::rfa {
                 Y *= scale_up;
                 for (; i < FOLD; i++)
                 {
-                    Y += carry(i * inccarX) * (bins[i] / 6.0);
-                    Y += primary((i - 1) * incpriX) - bins[i - 1];
+                    Y += ((double) carry(i * inccarX)) *
+                        ((double) bins[i] / 6.0);
+                    Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
                 }
-                Y += primary((FOLD - 1) * incpriX) - bins[FOLD - 1];
+                Y += ((double) primary((FOLD - 1) * incpriX) -
+                    ((double) bins[FOLD - 1]));
             }
             else
             {
-                Y += carry(0) * (bins[0] / 6.0);
+                Y += ((double) carry(0)) * ((double) bins[0] / 6.0);
                 for (i = 1; i < FOLD; i++)
                 {
-                    Y += carry(i * inccarX) * (bins[i] / 6.0);
-                    Y += (primary((i - 1) * incpriX) - bins[i - 1]);
+                    Y += ((double) carry(i * inccarX)) *
+                        ((double) bins[i] / 6.0);
+                    Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
                 }
-                Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
+                Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
             }
             return Y;
         }
@@ -626,7 +681,7 @@ namespace hpx::parallel::detail::rfa {
             if (ISNANINF(primary(0)))
                 return primary(0);
             if (ISZERO(primary(0)))
-                return 0.0;
+                return 0.0f;
 
             //Note that the following order of summation is in order of decreasing
             //exponent. The following code is specific to SBWIDTH=13, FLT_MANT_DIG=24, and
@@ -635,20 +690,22 @@ namespace hpx::parallel::detail::rfa {
             const auto* const bins = binned_bins(X_index);
             if (X_index == 0)
             {
-                Y += (double) carry(0) * (double) (bins[0] / 6.0) *
+                Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0) *
                     (double) EXPANSION;
-                Y += (double) carry(inccarX) * (double) (bins[1] / 6.0);
+                Y += (double) carry(inccarX) *
+                    (double) (((double) bins[1]) / 6.0);
                 Y += (double) (primary(0) - bins[0]) * (double) EXPANSION;
                 i = 2;
             }
             else
             {
-                Y += (double) carry(0) * (double) (bins[0] / 6.0);
+                Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0);
                 i = 1;
             }
             for (; i < FOLD; i++)
             {
-                Y += (double) carry(i * inccarX) * (double) (bins[i] / 6.0);
+                Y += (double) carry(i * inccarX) *
+                    (double) (((double) bins[i]) / 6.0);
                 Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
             }
             Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
@@ -693,8 +750,10 @@ namespace hpx::parallel::detail::rfa {
             if (shift > 0)
             {
                 const auto* const bins = binned_bins(Y_index);
-                //shift Y upwards and add X to Y
-#pragma unroll
+//shift Y upwards and add X to Y
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = FOLD - 1; i >= 1; i--)
                 {
                     if (i < shift)
@@ -704,7 +763,9 @@ namespace hpx::parallel::detail::rfa {
                     carry(i * inccarY) =
                         x.carry(i * inccarX) + carry((i - shift) * inccarY);
                 }
-#pragma unroll
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = 0; i < FOLD; i++)
                 {
                     if (i == shift)
@@ -716,8 +777,10 @@ namespace hpx::parallel::detail::rfa {
             else if (shift < 0)
             {
                 const auto* const bins = binned_bins(X_index);
-                //shift X upwards and add X to Y
-#pragma unroll
+//shift X upwards and add X to Y
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = 0; i < FOLD; i++)
                 {
                     if (i < -shift)
@@ -730,8 +793,10 @@ namespace hpx::parallel::detail::rfa {
             else if (shift == 0)
             {
                 const auto* const bins = binned_bins(X_index);
-                // add X to Y
-#pragma unroll
+// add X to Y
+#if !defined(HPX_CLANG_VERSION)
+                HPX_UNROLL
+#endif
                 for (int i = 0; i < FOLD; i++)
                 {
                     primary(i * incpriY) += x.primary(i * incpriX) - bins[i];
@@ -770,7 +835,7 @@ namespace hpx::parallel::detail::rfa {
         }
 
         ///Return the endurance of the binned fp
-        constexpr int endurance() const
+        constexpr size_t endurance() const
         {
             return ENDURANCE;
         }
@@ -866,11 +931,11 @@ namespace hpx::parallel::detail::rfa {
         {
             if (std::is_same_v<ftype, float>)
             {
-                return binned_conv_single(1, 1);
+                return static_cast<ftype>(binned_conv_single(1, 1));
             }
             else
             {
-                return binned_conv_double(1, 1);
+                return static_cast<ftype>(binned_conv_double(1, 1));
             }
         }
 
@@ -887,7 +952,8 @@ namespace hpx::parallel::detail::rfa {
         {
             const double X = std::abs(max_abs_val);
             const double S = std::abs(binned_sum);
-            return static_cast<ftype>(max(X, std::ldexp(0.5, MIN_EXP - 1)) *
+            return static_cast<ftype>(
+                (std::max)(X, std::ldexp(0.5, MIN_EXP - 1)) *
                     std::ldexp(0.5, (1 - FOLD) * BIN_WIDTH + 1) * N +
                 ((7.0 * EPSILON) /
                     (1.0 - 6.0 * std::sqrt(static_cast<double>(EPSILON)) -
@@ -972,7 +1038,7 @@ namespace hpx::parallel::detail::rfa {
             T max_abs_val = input[0];
             for (size_t i = 0; i < N; i++)
             {
-                max_abs_val = max(max_abs_val, std::abs(input[i]));
+                max_abs_val = (std::max)(max_abs_val, std::abs(input[i]));
             }
             add(input, N, max_abs_val);
         }
@@ -1141,4 +1207,4 @@ namespace hpx::parallel::detail::rfa {
         }
     };
 
-}    // namespace hpx::parallel::detail::rfa
\ No newline at end of file
+}    // namespace hpx::parallel::detail::rfa
diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/reduce_deterministic.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/reduce_deterministic.hpp
index b8435eec9a02..996865d519c5 100644
--- a/libs/core/algorithms/include/hpx/parallel/algorithms/reduce_deterministic.hpp
+++ b/libs/core/algorithms/include/hpx/parallel/algorithms/reduce_deterministic.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#include "detail/reduce_deterministic.hpp"
 #if defined(DOXYGEN)
 
 namespace hpx {
@@ -420,7 +419,7 @@ namespace hpx::parallel {
                         ReproducibleFloatingAccumulator<T_> {
                             T val = *part_begin;
                             return hpx::parallel::detail::
-                                sequential_reduce_deterministic_rfa<ExPolicy>(
+                                sequential_reduce_deterministic<ExPolicy>(
                                     HPX_FORWARD(ExPolicy, policy), ++part_begin,
                                     --part_size, HPX_MOVE(val), r);
                         };
@@ -433,7 +432,7 @@ namespace hpx::parallel {
                                         r = HPX_FORWARD(Reduce, r),
                                         policy](auto&& results) -> T {
                         return hpx::parallel::detail::
-                            sequential_reduce_deterministic_rfa<ExPolicy>(
+                            sequential_reduce_deterministic<ExPolicy>(
                                 HPX_FORWARD(ExPolicy, policy),
                                 hpx::util::begin(results),
                                 hpx::util::size(results), init, r)
diff --git a/libs/core/algorithms/tests/performance/CMakeLists.txt b/libs/core/algorithms/tests/performance/CMakeLists.txt
index d74788a9b47f..96ce826dc742 100644
--- a/libs/core/algorithms/tests/performance/CMakeLists.txt
+++ b/libs/core/algorithms/tests/performance/CMakeLists.txt
@@ -16,6 +16,7 @@ set(benchmarks
     benchmark_partial_sort_parallel
     benchmark_partition
     benchmark_partition_copy
+    benchmark_reduce_deterministic
     benchmark_remove
     benchmark_remove_if
     benchmark_scan_algorithms
diff --git a/libs/core/algorithms/tests/performance/benchmark_reduce_deterministic.cpp b/libs/core/algorithms/tests/performance/benchmark_reduce_deterministic.cpp
new file mode 100644
index 000000000000..daaee2b1269b
--- /dev/null
+++ b/libs/core/algorithms/tests/performance/benchmark_reduce_deterministic.cpp
@@ -0,0 +1,148 @@
+//  Copyright (c) 2024 Shreyas Atre
+//
+//  SPDX-License-Identifier: BSL-1.0
+//  Distributed under the Boost Software License, Version 1.0. (See accompanying
+//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <hpx/config.hpp>
+
+#if !defined(HPX_COMPUTE_DEVICE_CODE)
+#include <hpx/algorithm.hpp>
+#include <hpx/chrono.hpp>
+#include <hpx/execution.hpp>
+#include <hpx/init.hpp>
+#include <hpx/modules/testing.hpp>
+#include <hpx/parallel/algorithms/reduce.hpp>
+#include <hpx/parallel/algorithms/reduce_deterministic.hpp>
+
+#include <numeric>
+#include <random>
+#include <vector>
+
+int seed = 1000;
+std::mt19937 gen(seed);
+
+template <typename T>
+T get_rand(T LO = (std::numeric_limits<T>::min)(),
+    T HI = (std::numeric_limits<T>::max)())
+{
+    return LO +
+        static_cast<T>(std::rand()) /
+        (static_cast<T>(static_cast<T>((RAND_MAX)) / (HI - LO)));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void bench_reduce_deterministic(
+    const auto& deterministic_shuffled, const auto& val_det, const auto& op)
+{
+    // check if different type for deterministic and nondeeterministic
+    // and same result
+
+    auto r1_shuffled =
+        hpx::reduce_deterministic((std::begin(deterministic_shuffled)),
+            (std::end(deterministic_shuffled)), val_det, op);
+
+    HPX_UNUSED(r1_shuffled);
+}
+
+void bench_reduce(const auto& policy, const auto& deterministic_shuffled,
+    const auto& val_det, const auto& op)
+{
+    auto r = hpx::reduce(policy, (std::begin(deterministic_shuffled)),
+        (std::end(deterministic_shuffled)), val_det, op);
+
+    HPX_UNUSED(r);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+int hpx_main(hpx::program_options::variables_map& vm)
+{
+    std::srand(seed);
+
+    auto test_count = vm["test_count"].as<int>();
+
+    hpx::util::perftests_init(vm);
+
+    // verify that input is within domain of program
+    if (test_count == 0 || test_count < 0)
+    {
+        std::cerr << "test_count cannot be zero or negative...\n" << std::flush;
+        hpx::local::finalize();
+        return -1;
+    }
+
+    {
+        using FloatTypeDeterministic = float;
+        std::size_t LEN = 10000;
+
+        constexpr FloatTypeDeterministic num_bounds_det =
+            std::is_same_v<FloatTypeDeterministic, float> ? 1000.0 : 1000000.0;
+
+        std::vector<FloatTypeDeterministic> deterministic(LEN);
+
+        for (size_t i = 0; i < LEN; ++i)
+        {
+            deterministic[i] = get_rand<FloatTypeDeterministic>(
+                -num_bounds_det, num_bounds_det);
+        }
+
+        std::vector<FloatTypeDeterministic> deterministic_shuffled =
+            deterministic;
+
+        std::shuffle(
+            deterministic_shuffled.begin(), deterministic_shuffled.end(), gen);
+
+        FloatTypeDeterministic val_det(41.999);
+
+        auto op = [](FloatTypeDeterministic v1, FloatTypeDeterministic v2) {
+            return v1 + v2;
+        };
+        {
+            hpx::util::perftests_report("reduce", "seq", test_count, [&]() {
+                bench_reduce(
+                    hpx::execution::seq, deterministic_shuffled, val_det, op);
+            });
+        }
+        {
+            hpx::util::perftests_report("reduce", "par", test_count, [&]() {
+                bench_reduce(
+                    hpx::execution::par, deterministic_shuffled, val_det, op);
+            });
+        }
+        {
+            hpx::util::perftests_report(
+                "reduce deterministic", "seq", test_count, [&]() {
+                    bench_reduce_deterministic(
+                        deterministic_shuffled, val_det, op);
+                });
+        }
+
+        hpx::util::perftests_print_times();
+    }
+
+    return hpx::local::finalize();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+int main(int argc, char* argv[])
+{
+    using namespace hpx::program_options;
+
+    options_description cmdline("usage: " HPX_APPLICATION_STRING " [options]");
+
+    // clang-format off
+    cmdline.add_options()
+        ("test_count", value<int>()->default_value(100),
+            "number of tests to be averaged")
+        ;
+    // clang-format on
+
+    hpx::util::perftests_cfg(cmdline);
+    hpx::local::init_params init_args;
+    init_args.desc_cmdline = cmdline;
+    init_args.cfg = {"hpx.os_threads=all"};
+
+    return hpx::local::init(hpx_main, argc, argv, init_args);
+}
+#endif
diff --git a/libs/core/algorithms/tests/unit/algorithms/CMakeLists.txt b/libs/core/algorithms/tests/unit/algorithms/CMakeLists.txt
index 76dc5fcd9806..559ee830030e 100644
--- a/libs/core/algorithms/tests/unit/algorithms/CMakeLists.txt
+++ b/libs/core/algorithms/tests/unit/algorithms/CMakeLists.txt
@@ -246,7 +246,3 @@ foreach(test ${tests})
     "modules.algorithms.algorithms" ${test} ${${test}_PARAMETERS}
   )
 endforeach()
-
-target_compile_options(reduce_deterministic_test PRIVATE -fsanitize=address)
-
-target_link_options(reduce_deterministic_test PRIVATE -fsanitize=address)
\ No newline at end of file
diff --git a/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp b/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp
index 694b50cfb76d..5a06c509efdc 100644
--- a/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp
+++ b/libs/core/algorithms/tests/unit/algorithms/reduce_deterministic.cpp
@@ -4,8 +4,6 @@
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#pragma once
-
 #include <hpx/init.hpp>
 #include <hpx/modules/testing.hpp>
 #include <hpx/parallel/algorithms/detail/rfa.hpp>
@@ -19,6 +17,7 @@
 #include <limits>
 #include <numeric>
 #include <random>
+#include <string>
 #include <vector>
 
 #include "test_utils.hpp"
@@ -27,11 +26,12 @@ int seed = std::random_device{}();
 std::mt19937 gen(seed);
 
 template <typename T>
-T get_rand(
-    T LO = std::numeric_limits<T>::min(), T HI = std::numeric_limits<T>::max())
+T get_rand(T LO = (std::numeric_limits<T>::min)(),
+    T HI = (std::numeric_limits<T>::max)())
 {
     return LO +
-        static_cast<T>(std::rand()) / (static_cast<T>(RAND_MAX / (HI - LO)));
+        static_cast<T>(std::rand()) /
+        (static_cast<T>(static_cast<T>((RAND_MAX)) / (HI - LO)));
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -42,10 +42,12 @@ void test_reduce1(IteratorTag)
 {
     // check if different type for deterministic and nondeeterministic
     // and same result i.e. correct computation
-    using base_iterator_det = std::vector<FloatTypeDeterministic>::iterator;
+    using base_iterator_det =
+        typename std::vector<FloatTypeDeterministic>::iterator;
     using iterator_det = test::test_iterator<base_iterator_det, IteratorTag>;
 
-    using base_iterator_ndet = std::vector<FloatTypeNonDeterministic>::iterator;
+    using base_iterator_ndet =
+        typename std::vector<FloatTypeNonDeterministic>::iterator;
     using iterator_ndet = test::test_iterator<base_iterator_ndet, IteratorTag>;
 
     std::vector<FloatTypeDeterministic> deterministic(LEN);
@@ -75,51 +77,8 @@ void test_reduce1(IteratorTag)
     FloatTypeNonDeterministic r3 = std::accumulate(
         nondeterministic.begin(), nondeterministic.end(), val_non_det);
 
-    HPX_TEST_EQ(r1, r3);
-    HPX_TEST_EQ(r2, r3);
-}
-
-template <typename IteratorTag, typename FloatTypeDeterministic,
-    typename FloatTypeNonDeterministic, size_t LEN = 10007>
-void test_reduce_parallel1(IteratorTag)
-{
-    // check if different type for deterministic and nondeeterministic
-    // and same result i.e. correct computation
-    using base_iterator_det = std::vector<FloatTypeDeterministic>::iterator;
-    using iterator_det = test::test_iterator<base_iterator_det, IteratorTag>;
-
-    using base_iterator_ndet = std::vector<FloatTypeNonDeterministic>::iterator;
-    using iterator_ndet = test::test_iterator<base_iterator_ndet, IteratorTag>;
-
-    std::vector<FloatTypeDeterministic> deterministic(LEN);
-    std::vector<FloatTypeNonDeterministic> nondeterministic(LEN);
-
-    std::iota(
-        deterministic.begin(), deterministic.end(), FloatTypeDeterministic(0));
-
-    std::iota(nondeterministic.begin(), nondeterministic.end(),
-        FloatTypeNonDeterministic(0));
-
-    FloatTypeDeterministic val_det(0);
-    FloatTypeNonDeterministic val_non_det(0);
-    auto op = [](FloatTypeNonDeterministic v1, FloatTypeNonDeterministic v2) {
-        return v1 + v2;
-    };
-
-    FloatTypeDeterministic r1 = hpx::reduce_deterministic(hpx::execution::par,
-        iterator_det(std::begin(deterministic)),
-        iterator_det(std::end(deterministic)), val_det, op);
-
-    // verify values
-    // FloatTypeNonDeterministic r2 = hpx::reduce(hpx::execution::par,
-    //     iterator_ndet(std::begin(nondeterministic)),
-    //     iterator_ndet(std::end(nondeterministic)), val_non_det, op);
-
-    FloatTypeNonDeterministic r3 = std::accumulate(
-        nondeterministic.begin(), nondeterministic.end(), val_non_det);
-
-    HPX_TEST_EQ(r1, r3);
-    // HPX_TEST_EQ(r2, r3);
+    HPX_TEST_EQ(static_cast<FloatTypeNonDeterministic>(r1), r3);
+    HPX_TEST_EQ(static_cast<FloatTypeNonDeterministic>(r2), r3);
 }
 
 template <typename IteratorTag, typename FloatTypeDeterministic,
@@ -128,11 +87,12 @@ void test_reduce_determinism(IteratorTag)
 {
     // check if different type for deterministic and nondeeterministic
     // and same result
-    using base_iterator_det = std::vector<FloatTypeDeterministic>::iterator;
+    using base_iterator_det =
+        typename std::vector<FloatTypeDeterministic>::iterator;
     using iterator_det = test::test_iterator<base_iterator_det, IteratorTag>;
 
-    constexpr auto num_bounds_det =
-        std::is_same_v<FloatTypeDeterministic, float> ? 1000.0f : 1000000.0f;
+    constexpr FloatTypeDeterministic num_bounds_det =
+        std::is_same_v<FloatTypeDeterministic, float> ? 1000.0 : 1000000.0;
 
     std::vector<FloatTypeDeterministic> deterministic(LEN);
 
@@ -165,11 +125,15 @@ void test_reduce_determinism(IteratorTag)
         r1_shuffled);    // Deterministically calculated, should always satisfy
 }
 
+/// This test function is never called because it is not guaranteed to pass
+/// It serves an important purpose to demonstrate that floating point summation
+/// is not always associative i.e. a+b+c != a+c+b
 template <typename IteratorTag, typename FloatTypeNonDeterministic,
     size_t LEN = 10007>
 void test_orig_reduce_determinism(IteratorTag)
 {
-    using base_iterator_ndet = std::vector<FloatTypeNonDeterministic>::iterator;
+    using base_iterator_ndet =
+        typename std::vector<FloatTypeNonDeterministic>::iterator;
     using iterator_ndet = test::test_iterator<base_iterator_ndet, IteratorTag>;
 
     constexpr auto num_bounds_ndet =
@@ -221,7 +185,6 @@ void test_reduce1()
     test_reduce1<IteratorTag, double, float, 1000>(IteratorTag());
     test_reduce1<IteratorTag, float, double, 1000>(IteratorTag());
     test_reduce1<IteratorTag, double, double, 1000>(IteratorTag());
-    test_reduce_parallel1<IteratorTag, float, float, 1000>(IteratorTag());
 }
 
 template <typename IteratorTag>
@@ -233,21 +196,22 @@ void test_reduce2()
     test_reduce_determinism<IteratorTag, double, 1000>(IteratorTag());
 }
 
-template <typename IteratorTag>
-void test_reduce3()
-{
-    using namespace hpx::execution;
+// template <typename IteratorTag>
+// void test_reduce3()
+// {
+//     using namespace hpx::execution;
 
-    test_orig_reduce_determinism<IteratorTag, float, 1000>(IteratorTag());
-    test_orig_reduce_determinism<IteratorTag, double, 1000>(IteratorTag());
-}
+//     test_orig_reduce_determinism<IteratorTag, float, 1000>(IteratorTag());
+//     test_orig_reduce_determinism<IteratorTag, double, 1000>(IteratorTag());
+// }
 
 void reduce_test1()
 {
     test_reduce1<std::random_access_iterator_tag>();
     test_reduce2<std::random_access_iterator_tag>();
-    test_reduce3<std::random_access_iterator_tag>();
-    // test_reduce1<std::forward_iterator_tag>();
+    // test_reduce3<std::random_access_iterator_tag>();
+    test_reduce1<std::forward_iterator_tag>();
+    test_reduce2<std::forward_iterator_tag>();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/libs/core/concurrency/tests/unit/tagged_ptr.cpp b/libs/core/concurrency/tests/unit/tagged_ptr.cpp
index d86fc5775415..b29652a3ede1 100644
--- a/libs/core/concurrency/tests/unit/tagged_ptr.cpp
+++ b/libs/core/concurrency/tests/unit/tagged_ptr.cpp
@@ -25,7 +25,7 @@ void tagged_ptr_test()
         i = j;
 
         HPX_TEST_EQ(i.get_ptr(), &b);
-        HPX_TEST_EQ(i.get_tag(), 1UL);
+        HPX_TEST_EQ(i.get_tag(), 1);
     }
 
     {
@@ -43,7 +43,7 @@ void tagged_ptr_test()
 
     {
         tagged_ptr<int> j(&a, max_tag);
-        HPX_TEST_EQ(j.get_next_tag(), 0UL);
+        HPX_TEST_EQ(j.get_next_tag(), 0);
     }
 
     {
diff --git a/libs/core/debugging/src/print.cpp b/libs/core/debugging/src/print.cpp
index 8a01d9574853..3d7cf5da2aa0 100644
--- a/libs/core/debugging/src/print.cpp
+++ b/libs/core/debugging/src/print.cpp
@@ -57,10 +57,6 @@ namespace hpx::debug {
             std::ostream&, std::int32_t const&, int);
         template HPX_CORE_EXPORT void print_dec(
             std::ostream&, std::int64_t const&, int);
-#ifdef __APPLE__
-        template HPX_CORE_EXPORT void print_dec(
-            std::ostream&, unsigned long const&, int);
-#endif
         template HPX_CORE_EXPORT void print_dec(
             std::ostream&, std::uint64_t const&, int);