From ee9c48abcdc7188df4833f9b391f6e84d798000d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 29 Sep 2023 12:33:09 -0700 Subject: [PATCH] Add constructor overloads taking load factor as input (#369) This PR adds constructor overloads that take a size and load factor for the new map and set. --- include/cuco/detail/open_addressing_impl.cuh | 59 ++++++- include/cuco/detail/static_map/static_map.inl | 29 ++++ include/cuco/detail/static_set/static_set.inl | 26 +++ include/cuco/static_map.cuh | 46 ++++- include/cuco/static_set.cuh | 44 ++++- tests/CMakeLists.txt | 1 + tests/static_map/capacity_test.cu | 162 ++++++++++++++++++ tests/static_set/capacity_test.cu | 30 ++++ 8 files changed, 387 insertions(+), 10 deletions(-) create mode 100644 tests/static_map/capacity_test.cu diff --git a/include/cuco/detail/open_addressing_impl.cuh b/include/cuco/detail/open_addressing_impl.cuh index 2bc3a7225..556f821d4 100644 --- a/include/cuco/detail/open_addressing_impl.cuh +++ b/include/cuco/detail/open_addressing_impl.cuh @@ -34,6 +34,8 @@ #include +#include + namespace cuco { namespace experimental { namespace detail { @@ -120,8 +122,8 @@ class open_addressing_impl { * @param stream CUDA stream used to initialize the data structure */ constexpr open_addressing_impl(Extent capacity, - key_type empty_key_sentinel, - value_type empty_slot_sentinel, + Key empty_key_sentinel, + Value empty_slot_sentinel, KeyEqual const& pred, ProbingScheme const& probing_scheme, Allocator const& alloc, @@ -135,6 +137,59 @@ class open_addressing_impl { this->clear_async(stream); } + /** + * @brief Constructs a statically-sized open addressing data structure with the number of elements + * to insert `n`, the desired load factor, etc. + * + * @note This constructor helps users create a data structure based on the number of elements to + * insert and the desired load factor without manually computing the desired capacity. The actual + * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by + * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG + * size, and the window size and is computed via the `make_window_extent` factory. + * @note Insert operations will not automatically grow the container. + * @note Attempting to insert more unique keys than the capacity of the container results in + * undefined behavior. + * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert + * this sentinel value. + * @note This constructor doesn't synchronize the given stream. + * @note This overload will convert compile-time extents to runtime constants which might lead to + * performance regressions. + * + * @throw If the desired occupancy is no bigger than zero + * @throw If the desired occupancy is no smaller than one + * + * @param n The number of elements to insert + * @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50% + * load factor + * @param empty_key_sentinel The reserved key value for empty slots + * @param empty_slot_sentinel The reserved slot value for empty slots + * @param pred Key equality binary predicate + * @param probing_scheme Probing scheme + * @param alloc Allocator used for allocating device storage + * @param stream CUDA stream used to initialize the data structure + */ + constexpr open_addressing_impl(Extent n, + double desired_load_factor, + Key empty_key_sentinel, + Value empty_slot_sentinel, + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + Allocator const& alloc, + cuda_stream_ref stream) + : empty_key_sentinel_{empty_key_sentinel}, + empty_slot_sentinel_{empty_slot_sentinel}, + predicate_{pred}, + probing_scheme_{probing_scheme}, + storage_{make_window_extent( + static_cast(std::ceil(static_cast(n) / desired_load_factor))), + alloc} + { + CUCO_EXPECTS(desired_load_factor > 0., "Desired occupancy must be larger than zero"); + CUCO_EXPECTS(desired_load_factor < 1., "Desired occupancy must be smaller than one"); + + this->clear_async(stream); + } + /** * @brief Erases all elements from the container. After this call, `size()` returns zero. * Invalidates any references, pointers, or iterators referring to contained elements. diff --git a/include/cuco/detail/static_map/static_map.inl b/include/cuco/detail/static_map/static_map.inl index d7274245e..1cc932aeb 100644 --- a/include/cuco/detail/static_map/static_map.inl +++ b/include/cuco/detail/static_map/static_map.inl @@ -54,6 +54,35 @@ constexpr static_map +constexpr static_map:: + static_map(Extent n, + double desired_load_factor, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + Allocator const& alloc, + cuda_stream_ref stream) + : impl_{std::make_unique(n, + desired_load_factor, + empty_key_sentinel, + cuco::pair{empty_key_sentinel, empty_value_sentinel}, + pred, + probing_scheme, + alloc, + stream)}, + empty_value_sentinel_{empty_value_sentinel} +{ +} + template +constexpr static_set::static_set( + Extent n, + double desired_load_factor, + empty_key empty_key_sentinel, + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + Allocator const& alloc, + cuda_stream_ref stream) + : impl_{std::make_unique(n, + desired_load_factor, + empty_key_sentinel, + empty_key_sentinel, + pred, + probing_scheme, + alloc, + stream)} +{ +} + template , @@ -156,7 +155,7 @@ class static_map { /** * @brief Constructs a statically-sized map with the specified initial capacity, sentinel values - * and CUDA stream. + * and CUDA stream * * The actual map capacity depends on the given `capacity`, the probing scheme, CG size, and the * window size and it is computed via the `make_window_extent` factory. Insert operations will not @@ -165,8 +164,7 @@ class static_map { * * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert * this sentinel value. - * @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the - * stream before the object is first used. + * @note This constructor doesn't synchronize the given stream. * * @param capacity The requested lower-bound map size * @param empty_key_sentinel The reserved key value for empty slots @@ -184,6 +182,46 @@ class static_map { Allocator const& alloc = {}, cuda_stream_ref stream = {}); + /** + * @brief Constructs a statically-sized map with the number of elements to insert `n`, the desired + * load factor, etc + * + * @note This constructor helps users create a map based on the number of elements to insert and + * the desired load factor without manually computing the desired capacity. The actual map + * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by + * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG + * size, and the window size and is computed via the `make_window_extent` factory. + * @note Insert operations will not automatically grow the container. + * @note Attempting to insert more unique keys than the capacity of the container results in + * undefined behavior. + * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert + * this sentinel value. + * @note This constructor doesn't synchronize the given stream. + * @note This overload will convert compile-time extents to runtime constants which might lead to + * performance regressions. + * + * @throw If the desired occupancy is no bigger than zero + * @throw If the desired occupancy is no smaller than one + * + * @param n The number of elements to insert + * @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50% + * load factor + * @param empty_key_sentinel The reserved key value for empty slots + * @param empty_value_sentinel The reserved mapped value for empty slots + * @param pred Key equality binary predicate + * @param probing_scheme Probing scheme + * @param alloc Allocator used for allocating device storage + * @param stream CUDA stream used to initialize the map + */ + constexpr static_map(Extent n, + double desired_load_factor, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + KeyEqual const& pred = {}, + ProbingScheme const& probing_scheme = {}, + Allocator const& alloc = {}, + cuda_stream_ref stream = {}); + /** * @brief Erases all elements from the container. After this call, `size()` returns zero. * Invalidates any references, pointers, or iterators referring to contained elements. diff --git a/include/cuco/static_set.cuh b/include/cuco/static_set.cuh index 613a99bd4..6d48d5dc8 100644 --- a/include/cuco/static_set.cuh +++ b/include/cuco/static_set.cuh @@ -79,7 +79,6 @@ namespace experimental { * @tparam Allocator Type of allocator used for device storage * @tparam Storage Slot window storage type */ - template , cuda::thread_scope Scope = cuda::thread_scope_device, @@ -131,7 +130,7 @@ class static_set { /** * @brief Constructs a statically-sized set with the specified initial capacity, sentinel values - * and CUDA stream. + * and CUDA stream * * The actual set capacity depends on the given `capacity`, the probing scheme, CG size, and the * window size and it is computed via the `make_window_extent` factory. Insert operations will not @@ -140,8 +139,7 @@ class static_set { * * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert * this sentinel value. - * @note If a non-default CUDA stream is provided, the caller is responsible for synchronizing the - * stream before the object is first used. + * @note This constructor doesn't synchronize the given stream. * * @param capacity The requested lower-bound set size * @param empty_key_sentinel The reserved key value for empty slots @@ -157,6 +155,44 @@ class static_set { Allocator const& alloc = {}, cuda_stream_ref stream = {}); + /** + * @brief Constructs a statically-sized map with the number of elements to insert `n`, the desired + * load factor, etc + * + * @note This constructor helps users create a set based on the number of elements to insert and + * the desired load factor without manually computing the desired capacity. The actual set + * capacity will be a size no smaller than `ceil(n / desired_load_factor)`. It's determined by + * multiple factors including the given `n`, the desired load factor, the probing scheme, the CG + * size, and the window size and is computed via the `make_window_extent` factory. + * @note Insert operations will not automatically grow the container. + * @note Attempting to insert more unique keys than the capacity of the container results in + * undefined behavior. + * @note Any `*_sentinel`s are reserved and behavior is undefined when attempting to insert + * this sentinel value. + * @note This constructor doesn't synchronize the given stream. + * @note This overload will convert compile-time extents to runtime constants which might lead to + * performance regressions. + * + * @throw If the desired occupancy is no bigger than zero + * @throw If the desired occupancy is no smaller than one + * + * @param n The number of elements to insert + * @param desired_load_factor The desired load factor of the container, e.g., 0.5 implies a 50% + * load factor + * @param empty_key_sentinel The reserved key value for empty slots + * @param pred Key equality binary predicate + * @param probing_scheme Probing scheme + * @param alloc Allocator used for allocating device storage + * @param stream CUDA stream used to initialize the set + */ + constexpr static_set(Extent n, + double desired_load_factor, + empty_key empty_key_sentinel, + KeyEqual const& pred = {}, + ProbingScheme const& probing_scheme = {}, + Allocator const& alloc = {}, + cuda_stream_ref stream = {}); + /** * @brief Erases all elements from the container. After this call, `size()` returns zero. * Invalidates any references, pointers, or iterators referring to contained elements. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3deeeddf1..775b5b82f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -69,6 +69,7 @@ ConfigureTest(STATIC_SET_TEST ################################################################################################### # - static_map tests ------------------------------------------------------------------------------ ConfigureTest(STATIC_MAP_TEST + static_map/capacity_test.cu static_map/custom_type_test.cu static_map/duplicate_keys_test.cu static_map/erase_test.cu diff --git a/tests/static_map/capacity_test.cu b/tests/static_map/capacity_test.cu new file mode 100644 index 000000000..13774fe8a --- /dev/null +++ b/tests/static_map/capacity_test.cu @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +TEST_CASE("Static map capacity", "") +{ + using Key = int32_t; + using T = int32_t; + using ProbeT = cuco::experimental::double_hashing<1, cuco::default_hash_function>; + using Equal = thrust::equal_to; + using AllocatorT = cuco::cuda_allocator; + using StorageT = cuco::experimental::storage<2>; + + SECTION("zero capacity is allowed.") + { + auto constexpr gold_capacity = 4; + + using extent_type = cuco::experimental::extent; + cuco::experimental::static_map + map{extent_type{}, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + SECTION("negative capacity (ikr -_-||) is also allowed.") + { + auto constexpr gold_capacity = 4; + + using extent_type = cuco::experimental::extent; + cuco::experimental::static_map + map{extent_type{-10}, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + constexpr std::size_t num_keys{400}; + + SECTION("Dynamic extent is evaluated at run time.") + { + auto constexpr gold_capacity = 422; // 211 x 2 + + using extent_type = cuco::experimental::extent; + cuco::experimental::static_map + map{num_keys, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + SECTION("map can be constructed from plain integer.") + { + auto constexpr gold_capacity = 422; // 211 x 2 + + cuco::experimental::static_map + map{num_keys, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + SECTION("map can be constructed from plain integer and load factor.") + { + auto constexpr gold_capacity = 502; // 251 x 2 + + cuco::experimental::static_map + map{num_keys, 0.8, cuco::empty_key{-1}, cuco::empty_value{-1}}; + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + SECTION("Dynamic extent is evaluated at run time.") + { + auto constexpr gold_capacity = 412; // 103 x 2 x 2 + + using probe = cuco::experimental::linear_probing<2, cuco::default_hash_function>; + auto map = cuco::experimental::static_map, + cuda::thread_scope_device, + Equal, + probe, + AllocatorT, + StorageT>{ + num_keys, cuco::empty_key{-1}, cuco::empty_value{-1}}; + + auto const capacity = map.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = map.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } +} diff --git a/tests/static_set/capacity_test.cu b/tests/static_set/capacity_test.cu index 4c66a7ccc..f042cdb73 100644 --- a/tests/static_set/capacity_test.cu +++ b/tests/static_set/capacity_test.cu @@ -76,6 +76,36 @@ TEST_CASE("Static set capacity", "") REQUIRE(ref_capacity == gold_capacity); } + SECTION("Set can be constructed from plain integer.") + { + auto constexpr gold_capacity = 422; // 211 x 2 + + cuco::experimental:: + static_set + set{num_keys, cuco::empty_key{-1}}; + auto const capacity = set.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = set.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + + SECTION("Set can be constructed from plain integer and load factor.") + { + auto constexpr gold_capacity = 502; // 251 x 2 + + cuco::experimental:: + static_set + set{num_keys, 0.8, cuco::empty_key{-1}}; + auto const capacity = set.capacity(); + REQUIRE(capacity == gold_capacity); + + auto ref = set.ref(cuco::experimental::insert); + auto const ref_capacity = ref.capacity(); + REQUIRE(ref_capacity == gold_capacity); + } + SECTION("Dynamic extent is evaluated at run time.") { auto constexpr gold_capacity = 412; // 103 x 2 x 2