Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 194 additions & 194 deletions include/boost/decimal/decimal64_t.hpp

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions include/boost/decimal/detail/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,83 +25,83 @@ namespace detail {
namespace impl {

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto storage_width_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto storage_width_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 32 :
decimal_val_v<DecimalType> < 128 ? 64 : 128;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto precision_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto precision_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 7 :
decimal_val_v<DecimalType> < 128 ? 16 : 34;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto bias_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto bias_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 101 :
decimal_val_v<DecimalType> < 128 ? 398 : 6176;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto max_biased_exp_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto max_biased_exp_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 191 :
decimal_val_v<DecimalType> < 128 ? 767 : 12287;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto emax_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto emax_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 96 :
decimal_val_v<DecimalType> < 128 ? 384 : 6144;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto emin_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto emin_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? -95 :
decimal_val_v<DecimalType> < 128 ? -383 : -6143;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto combination_field_width_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto combination_field_width_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 11 :
decimal_val_v<DecimalType> < 128 ? 13 : 17;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto trailing_significand_field_width_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto trailing_significand_field_width_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 20 :
decimal_val_v<DecimalType> < 128 ? 50 : 110;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType, std::enable_if_t<decimal_val_v<DecimalType> < 128, bool> = true>
constexpr auto max_significand_v() noexcept
BOOST_DECIMAL_CUDA_CONSTEXPR auto max_significand_v() noexcept
{
return decimal_val_v<DecimalType> < 64 ? 9'999'999 : 9'999'999'999'999'999;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType, std::enable_if_t<decimal_val_v<DecimalType> >= 128, bool> = true>
constexpr auto max_significand_v() noexcept
BOOST_DECIMAL_CUDA_CONSTEXPR auto max_significand_v() noexcept
{
// 34x 9s
return BOOST_DECIMAL_DETAIL_INT128_UINT128_C(9999999999999999999999999999999999);
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto max_string_length_v() noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto max_string_length_v() noexcept -> int
{
return decimal_val_v<DecimalType> < 64 ? 15 :
decimal_val_v<DecimalType> < 128 ? 25 : 41;
}

template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE DecimalType>
constexpr auto is_fast_type_v() noexcept -> bool
BOOST_DECIMAL_CUDA_CONSTEXPR auto is_fast_type_v() noexcept -> bool
{
// The fast types all assign 1 additional bit over the regular types
return decimal_val_v<DecimalType> % 2 == 1;
Expand Down
14 changes: 7 additions & 7 deletions include/boost/decimal/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# define BOOST_DECIMAL_DEC_EVAL_METHOD 0
#endif

// Fundamental NVCC options
#if defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA)
// Fundamental CUDACC options
#if defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA)
# define BOOST_DECIMAL_HOST_DEVICE __host__ __device__
# define BOOST_DECIMAL_CUDA_CONSTEXPR __host__ __device__ constexpr
# define BOOST_DECIMAL_DETAIL_INT128_ENABLE_CUDA
Expand Down Expand Up @@ -97,7 +97,7 @@
# endif
#endif

#if defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA)
#if defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA)

// Include intrinsics if available
#if defined(_MSC_VER)
Expand Down Expand Up @@ -147,7 +147,7 @@
#endif

// Use 128-bit integers and suppress warnings for using extensions
#if (defined(BOOST_HAS_INT128) || (defined(__SIZEOF_INT128__) && !defined(_MSC_VER))) && !(defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
#if (defined(BOOST_HAS_INT128) || (defined(__SIZEOF_INT128__) && !defined(_MSC_VER))) && !(defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))

namespace boost { namespace decimal { namespace detail {

Expand All @@ -170,7 +170,7 @@ typedef unsigned __int128 builtin_uint128_t;
#endif

// 128-bit floats
#if (defined(BOOST_HAS_FLOAT128) || defined(__SIZEOF_FLOAT128__)) && !(defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
#if (defined(BOOST_HAS_FLOAT128) || defined(__SIZEOF_FLOAT128__)) && !(defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
# define BOOST_DECIMAL_HAS_FLOAT128
#endif

Expand Down Expand Up @@ -283,9 +283,9 @@ typedef unsigned __int128 builtin_uint128_t;
# define BOOST_DECIMAL_HAS_BUILTIN_IS_CONSTANT_EVALUATED
#endif

#if defined(BOOST_DECIMAL_HAS_IS_CONSTANT_EVALUATED) && !(defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
#if defined(BOOST_DECIMAL_HAS_IS_CONSTANT_EVALUATED) && !(defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
# define BOOST_DECIMAL_IS_CONSTANT_EVALUATED(x) std::is_constant_evaluated()
#elif defined(BOOST_DECIMAL_HAS_BUILTIN_IS_CONSTANT_EVALUATED) && !(defined(__NVCC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
#elif defined(BOOST_DECIMAL_HAS_BUILTIN_IS_CONSTANT_EVALUATED) && !(defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))
# define BOOST_DECIMAL_IS_CONSTANT_EVALUATED(x) __builtin_is_constant_evaluated()
#else
# define BOOST_DECIMAL_IS_CONSTANT_EVALUATED(x) false
Expand Down
2 changes: 1 addition & 1 deletion include/boost/decimal/detail/div_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_CUDA_CONSTEXPR auto generic_div_impl(co
}

template <typename DecimalType, typename T>
BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_generic_div_impl(const T& lhs, const T& rhs, const bool sign) noexcept -> DecimalType
BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_CUDA_CONSTEXPR auto d64_generic_div_impl(const T& lhs, const T& rhs, const bool sign) noexcept -> DecimalType
{
using unsigned_int128_type = boost::int128::uint128_t;

Expand Down
10 changes: 6 additions & 4 deletions include/boost/decimal/detail/fenv_rounding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto divmod(T dividend, T divisor) noexcept -> divm
return {q, r};
}

#ifdef BOOST_DECIMAL_DETAIL_INT128_HAS_INT128
#if defined(BOOST_DECIMAL_DETAIL_INT128_HAS_INT128) && !(defined(__CUDACC__) && defined(BOOST_DECIMAL_ENABLE_CUDA))

constexpr auto divmod(const int128::uint128_t dividend, const int128::uint128_t divisor) -> divmod_result<int128::uint128_t>
{
Expand Down Expand Up @@ -101,7 +101,7 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto divmod10(const int128::uint128_t lhs) noexcept
}

template <typename TargetType, typename T>
BOOST_DECIMAL_CUDA_CONSTEXPR auto fenv_round_impl(T& val, const bool is_neg, const bool sticky, const rounding_mode round = _boost_decimal_global_rounding_mode) noexcept -> int
BOOST_DECIMAL_CUDA_CONSTEXPR auto fenv_round_impl(T& val, const bool is_neg, const bool sticky, const rounding_mode round) noexcept -> int
{
using significand_type = std::conditional_t<decimal_val_v<TargetType> >= 128, int128::uint128_t, std::int64_t>;

Expand Down Expand Up @@ -166,7 +166,8 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto fenv_round_impl(T& val, const bool is_neg, con
template <typename TargetType, typename T, std::enable_if_t<is_integral_v<T>, bool> = true>
BOOST_DECIMAL_CUDA_CONSTEXPR auto fenv_round(T& val, bool is_neg = false, bool sticky = false) noexcept -> int
{
return impl::fenv_round_impl<TargetType>(val, is_neg, sticky);
constexpr auto round {_boost_decimal_global_rounding_mode};
return impl::fenv_round_impl<TargetType>(val, is_neg, sticky, round);
}

#else
Expand All @@ -176,7 +177,8 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto fenv_round(T& val, bool is_neg = false, bool s
{
if (BOOST_DECIMAL_IS_CONSTANT_EVALUATED(coeff))
{
return impl::fenv_round_impl<TargetType>(val, is_neg, sticky);
constexpr auto round {_boost_decimal_global_rounding_mode};
return impl::fenv_round_impl<TargetType>(val, is_neg, sticky, round);
}
else
{
Expand Down
11 changes: 6 additions & 5 deletions include/boost/decimal/detail/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
#ifndef BOOST_DECIMAL_DETAIL_INT128_HPP
#define BOOST_DECIMAL_DETAIL_INT128_HPP

#include "int128/int128.hpp"
#include "int128/bit.hpp"
#include "int128/iostream.hpp"
#include "int128/literals.hpp"
#include "int128/numeric.hpp"
#include <boost/decimal/detail/config.hpp>
#include <boost/decimal/detail/int128/int128.hpp>
#include <boost/decimal/detail/int128/bit.hpp>
#include <boost/decimal/detail/int128/iostream.hpp>
#include <boost/decimal/detail/int128/literals.hpp>
#include <boost/decimal/detail/int128/numeric.hpp>

#endif // BOOST_DECIMAL_DETAIL_INT128_HPP
6 changes: 3 additions & 3 deletions include/boost/decimal/detail/mul_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_CUDA_CONSTEXPR auto mul_impl(T lhs_sig,

// In the fast case we are better served doing our 128-bit division here since we are at a know starting point
template <typename ReturnType, typename T>
constexpr auto d64_mul_impl(const T& lhs, const T& rhs) noexcept -> ReturnType
BOOST_DECIMAL_CUDA_CONSTEXPR auto d64_mul_impl(const T& lhs, const T& rhs) noexcept -> ReturnType
{
using unsigned_int128_type = boost::int128::uint128_t;

Expand All @@ -85,7 +85,7 @@ constexpr auto d64_mul_impl(const T& lhs, const T& rhs) noexcept -> ReturnType
}

template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_CUDA_CONSTEXPR auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
-> std::enable_if_t<detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
{
Expand All @@ -108,7 +108,7 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, boo
}

template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
BOOST_DECIMAL_FORCE_INLINE BOOST_DECIMAL_CUDA_CONSTEXPR auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
-> std::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
{
Expand Down
2 changes: 1 addition & 1 deletion include/boost/decimal/detail/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace decimal {
namespace detail {

template <typename T>
constexpr auto swap(T& x, T& y) noexcept -> void
BOOST_DECIMAL_CUDA_CONSTEXPR auto swap(T& x, T& y) noexcept -> void
{
const T temp {x};
x = y;
Expand Down
6 changes: 6 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ run test_cuda_decimal32_t_add.cu ;
run test_cuda_decimal32_t_sub.cu ;
run test_cuda_decimal32_t_mul.cu ;
run test_cuda_decimal32_t_div.cu ;

run test_cuda_decimal64_t_construct.cu ;
run test_cuda_decimal64_t_add.cu ;
run test_cuda_decimal64_t_sub.cu ;
run test_cuda_decimal64_t_mul.cu ;
run test_cuda_decimal64_t_div.cu ;
77 changes: 77 additions & 0 deletions test/test_cuda_decimal64_t_add.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <vector>
#include <random>
#include <boost/decimal/decimal64_t.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#include <cuda_runtime.h>

using test_type = boost::decimal::decimal64_t;

__global__ void cuda_test(const test_type *in, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = in[i] + in[i];
}
}

int main(void)
{
std::mt19937_64 rng{42};

int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);

std::uniform_int_distribution<int> dist{1, 4999};
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = test_type(dist(rng));
}

int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cudaDeviceSetLimit(cudaLimitStackSize, 2048);
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl;

std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(input_vector[i] + input_vector[i]);
}
double t = w.elapsed();

for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading
Loading