From a78629d22257c1cb9b09dff4a468e408d8dcc85a Mon Sep 17 00:00:00 2001 From: Aman Atman <41161981+willtryagain@users.noreply.github.com> Date: Thu, 5 Dec 2024 19:22:44 +0530 Subject: [PATCH 1/2] replace int_div_ceil with std::cuda::ceil --- .../static_map/insert_or_apply_bench.cu | 5 +- include/cuco/detail/extent/extent.inl | 8 ++-- include/cuco/detail/prime.hpp | 5 +- include/cuco/detail/utility/cuda.hpp | 6 ++- include/cuco/detail/utility/math.cuh | 46 ------------------- 5 files changed, 16 insertions(+), 54 deletions(-) delete mode 100644 include/cuco/detail/utility/math.cuh diff --git a/benchmarks/static_map/insert_or_apply_bench.cu b/benchmarks/static_map/insert_or_apply_bench.cu index 4633a8b0f..60cd91caf 100644 --- a/benchmarks/static_map/insert_or_apply_bench.cu +++ b/benchmarks/static_map/insert_or_apply_bench.cu @@ -21,6 +21,9 @@ #include #include +#include + + #include #include @@ -42,7 +45,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_insert_or_appl auto const occupancy = state.get_float64("Occupancy"); auto const multiplicity = state.get_int64("Multiplicity"); - std::size_t const size = cuco::detail::int_div_ceil(num_keys, multiplicity) / occupancy; + std::size_t const size = static_cast(cuda::std::ceil(static_cast(num_keys) / static_cast(multiplicity))) / occupancy; thrust::device_vector keys(num_keys); diff --git a/include/cuco/detail/extent/extent.inl b/include/cuco/detail/extent/extent.inl index 916d75e2c..88261fd4b 100644 --- a/include/cuco/detail/extent/extent.inl +++ b/include/cuco/detail/extent/extent.inl @@ -18,7 +18,6 @@ #include #include // TODO move to detail/extent/ -#include #include #include #include @@ -26,6 +25,9 @@ #include +#include + + namespace cuco { template @@ -91,8 +93,8 @@ template (static_cast(std::numeric_limits::max()) < max_prime) ? std::numeric_limits::max() : static_cast(max_prime); - auto const size = cuco::detail::int_div_ceil( - std::max(static_cast(ext), static_cast(1)), CGSize * BucketSize); + auto const size = static_cast(cuda::std::ceil(static_cast( + std::max(static_cast(ext), static_cast(1))) / static_cast(CGSize * BucketSize))); if (size > max_value) { CUCO_FAIL("Invalid input extent"); } if constexpr (N == dynamic_extent) { diff --git a/include/cuco/detail/prime.hpp b/include/cuco/detail/prime.hpp index 4ef0a35e3..f28db0e8d 100644 --- a/include/cuco/detail/prime.hpp +++ b/include/cuco/detail/prime.hpp @@ -16,12 +16,13 @@ #pragma once -#include #include #include #include #include +#include + namespace cuco { namespace detail { @@ -20154,7 +20155,7 @@ constexpr T get_valid_capacity(T capacity) noexcept if constexpr (not uses_vector_load) { return cg_size; } }(); - auto const c = int_div_ceil(capacity, stride); + auto const c = static_cast(cuda::std::ceil(static_cast(capacity) / static_cast(stride))); auto const min_prime = std::lower_bound(primes.begin(), primes.end(), c); return *min_prime * stride; } diff --git a/include/cuco/detail/utility/cuda.hpp b/include/cuco/detail/utility/cuda.hpp index 49445e02f..37f35cfdb 100644 --- a/include/cuco/detail/utility/cuda.hpp +++ b/include/cuco/detail/utility/cuda.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include namespace cuco { namespace detail { @@ -43,7 +43,9 @@ constexpr auto grid_size(index_type num, int32_t stride = default_stride(), int32_t block_size = default_block_size()) noexcept { - return int_div_ceil(cg_size * num, stride * block_size); + return static_cast( + cuda::std::ceil(static_cast(cg_size * num) / static_cast (stride * block_size)) + ); } /** diff --git a/include/cuco/detail/utility/math.cuh b/include/cuco/detail/utility/math.cuh deleted file mode 100644 index c2715f6fa..000000000 --- a/include/cuco/detail/utility/math.cuh +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - */ - -#pragma once - -#include - -namespace cuco { -namespace detail { - -/** - * @brief Ceiling of an integer division - * - * @tparam T Type of dividend - * @tparam U Type of divisor - * - * @throw If `T` is not an integral type - * @throw If `U` is not an integral type - * - * @param dividend Numerator - * @param divisor Denominator - * - * @return Ceiling of the integer division - */ -template -__host__ __device__ constexpr T int_div_ceil(T dividend, U divisor) noexcept -{ - static_assert(cuda::std::is_integral_v); - static_assert(cuda::std::is_integral_v); - return (dividend + divisor - 1) / divisor; -} - -} // namespace detail -} // namespace cuco From 94c324bcbe03a85c32a6b09c273a22167b0d2b17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 14:28:37 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto code formatting --- benchmarks/static_map/insert_or_apply_bench.cu | 8 ++++---- include/cuco/detail/extent/extent.inl | 9 ++++----- include/cuco/detail/prime.hpp | 6 +++--- include/cuco/detail/utility/cuda.hpp | 4 ++-- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/benchmarks/static_map/insert_or_apply_bench.cu b/benchmarks/static_map/insert_or_apply_bench.cu index 60cd91caf..ad668b6ac 100644 --- a/benchmarks/static_map/insert_or_apply_bench.cu +++ b/benchmarks/static_map/insert_or_apply_bench.cu @@ -21,11 +21,9 @@ #include #include -#include - - #include +#include #include #include @@ -45,7 +43,9 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_insert_or_appl auto const occupancy = state.get_float64("Occupancy"); auto const multiplicity = state.get_int64("Multiplicity"); - std::size_t const size = static_cast(cuda::std::ceil(static_cast(num_keys) / static_cast(multiplicity))) / occupancy; + std::size_t const size = static_cast(cuda::std::ceil(static_cast(num_keys) / + static_cast(multiplicity))) / + occupancy; thrust::device_vector keys(num_keys); diff --git a/include/cuco/detail/extent/extent.inl b/include/cuco/detail/extent/extent.inl index 88261fd4b..6ef69a705 100644 --- a/include/cuco/detail/extent/extent.inl +++ b/include/cuco/detail/extent/extent.inl @@ -23,10 +23,8 @@ #include #include -#include - #include - +#include namespace cuco { @@ -93,8 +91,9 @@ template (static_cast(std::numeric_limits::max()) < max_prime) ? std::numeric_limits::max() : static_cast(max_prime); - auto const size = static_cast(cuda::std::ceil(static_cast( - std::max(static_cast(ext), static_cast(1))) / static_cast(CGSize * BucketSize))); + auto const size = static_cast(cuda::std::ceil( + static_cast(std::max(static_cast(ext), static_cast(1))) / + static_cast(CGSize * BucketSize))); if (size > max_value) { CUCO_FAIL("Invalid input extent"); } if constexpr (N == dynamic_extent) { diff --git a/include/cuco/detail/prime.hpp b/include/cuco/detail/prime.hpp index f28db0e8d..26e876ab3 100644 --- a/include/cuco/detail/prime.hpp +++ b/include/cuco/detail/prime.hpp @@ -16,13 +16,12 @@ #pragma once +#include #include #include #include #include -#include - namespace cuco { namespace detail { @@ -20155,7 +20154,8 @@ constexpr T get_valid_capacity(T capacity) noexcept if constexpr (not uses_vector_load) { return cg_size; } }(); - auto const c = static_cast(cuda::std::ceil(static_cast(capacity) / static_cast(stride))); + auto const c = + static_cast(cuda::std::ceil(static_cast(capacity) / static_cast(stride))); auto const min_prime = std::lower_bound(primes.begin(), primes.end(), c); return *min_prime * stride; } diff --git a/include/cuco/detail/utility/cuda.hpp b/include/cuco/detail/utility/cuda.hpp index 37f35cfdb..7cde2d572 100644 --- a/include/cuco/detail/utility/cuda.hpp +++ b/include/cuco/detail/utility/cuda.hpp @@ -16,6 +16,7 @@ #pragma once #include + #include namespace cuco { @@ -44,8 +45,7 @@ constexpr auto grid_size(index_type num, int32_t block_size = default_block_size()) noexcept { return static_cast( - cuda::std::ceil(static_cast(cg_size * num) / static_cast (stride * block_size)) - ); + cuda::std::ceil(static_cast(cg_size * num) / static_cast(stride * block_size))); } /**