Skip to content

Commit

Permalink
Add MurmurHash3 128 bit hash function (#495)
Browse files Browse the repository at this point in the history
Adds MurmurHash3 128 bit hash function.

- [x] Adds unit test (compares hash value based on
[reference_implementation
](https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp)
: [see_on_godbolt](https://godbolt.org/z/7sWKq74o6)).
- [x] updates documentation.
- [x] contributes to #480  

---------

Signed-off-by: srinivasyadav18 <[email protected]>
  • Loading branch information
srinivasyadav18 authored Jun 4, 2024
1 parent 114a780 commit 03304a3
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 5 deletions.
23 changes: 20 additions & 3 deletions benchmarks/hash_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ struct large_key {
int32_t data_[Words];
};

template <typename T>
constexpr __host__ __device__ void hash_result_aggregate(T& agg, T hash_val)
{
agg += hash_val;
}

template <>
constexpr __host__ __device__ void hash_result_aggregate(cuda::std::array<uint64_t, 2>& agg,
cuda::std::array<uint64_t, 2> hash_val)
{
agg[0] += hash_val[0];
agg[1] += hash_val[1];
}

template <int32_t BlockSize, typename Hasher, typename OutputIt>
__global__ void hash_bench_kernel(Hasher hash,
cuco::detail::index_type n,
Expand All @@ -47,12 +61,12 @@ __global__ void hash_bench_kernel(Hasher hash,
cuco::detail::index_type const gid = BlockSize * blockIdx.x + threadIdx.x;
cuco::detail::index_type const loop_stride = gridDim.x * BlockSize;
cuco::detail::index_type idx = gid;
typename Hasher::result_type agg = 0;
typename Hasher::result_type agg = {};

while (idx < n) {
typename Hasher::argument_type key(idx);
for (int32_t i = 0; i < 100; ++i) { // execute hash func 100 times
agg += hash(key);
hash_result_aggregate(agg, hash(key));
}
idx += loop_stride;
}
Expand Down Expand Up @@ -94,7 +108,10 @@ NVBENCH_BENCH_TYPES(
cuco::xxhash_64<nvbench::int64_t>,
cuco::xxhash_64<large_key<32>>,
cuco::murmurhash3_fmix_32<nvbench::int32_t>,
cuco::murmurhash3_fmix_64<nvbench::int64_t>>))
cuco::murmurhash3_fmix_64<nvbench::int64_t>,
cuco::murmurhash3_x64_128<nvbench::int32_t>,
cuco::murmurhash3_x64_128<nvbench::int64_t>,
cuco::murmurhash3_x64_128<large_key<32>>>))
.set_name("hash_function_eval")
.set_type_axes_names({"Hash"})
.set_max_noise(cuco::benchmark::defaults::MAX_NOISE);
149 changes: 149 additions & 0 deletions include/cuco/detail/hash_functions/murmurhash3.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cuco/detail/hash_functions/utils.cuh>
#include <cuco/extent.cuh>

#include <cuda/std/array>

#include <cstddef>
#include <cstdint>
#include <type_traits>
Expand Down Expand Up @@ -206,4 +208,151 @@ struct MurmurHash3_32 {
MurmurHash3_fmix32<std::uint32_t> fmix32_;
std::uint32_t seed_;
};

/**
* @brief A `MurmurHash3_x64_128` hash function to hash the given argument on host and device.
*
* MurmurHash3_x64_128 implementation from
* https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
* -----------------------------------------------------------------------------
* MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
* hereby disclaims copyright to this source code.
*
* Note - The x86 and x64 versions do _not_ produce the same results, as the algorithms are
* optimized for their respective platforms. You can still compile and run any of them on any
* platform, but your performance with the non-native version will be less than optimal.
*
* @tparam Key The type of the values to hash
*/
template <typename Key>
struct MurmurHash3_x64_128 {
using argument_type = Key; ///< The type of the values taken as argument
using result_type = cuda::std::array<std::uint64_t, 2>; ///< The type of the hash values produced

/**
* @brief Constructs a MurmurHash3_x64_128 hash function with the given `seed`.
*
* @param seed A custom number to randomize the resulting hash value
*/
__host__ __device__ constexpr MurmurHash3_x64_128(std::uint64_t seed = 0)
: fmix64_{0}, seed_{seed}
{
}

/**
* @brief Returns a hash value for its argument, as a value of type `result_type`.
*
* @param key The input argument to hash
* @return The resulting hash value for `key`
*/
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
{
return compute_hash(reinterpret_cast<std::byte const*>(&key),
cuco::extent<std::size_t, sizeof(Key)>{});
}

/**
* @brief Returns a hash value for its argument, as a value of type `result_type`.
*
* @tparam Extent The extent type
*
* @param bytes The input argument to hash
* @param size The extent of the data in bytes
* @return The resulting hash value
*/
template <typename Extent>
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
Extent size) const noexcept
{
constexpr std::uint32_t block_size = 16;
auto const nblocks = size / block_size;

std::uint64_t h1 = seed_;
std::uint64_t h2 = seed_;
constexpr std::uint64_t c1 = 0x87c37b91114253d5ull;
constexpr std::uint64_t c2 = 0x4cf5ad432745937full;
//----------
// body
for (std::remove_const_t<decltype(nblocks)> i = 0; size >= block_size && i < nblocks; i++) {
std::uint64_t k1 = load_chunk<std::uint64_t>(bytes, 2 * i);
std::uint64_t k2 = load_chunk<std::uint64_t>(bytes, 2 * i + 1);

k1 *= c1;
k1 = rotl64(k1, 31);
k1 *= c2;

h1 ^= k1;
h1 = rotl64(h1, 27);
h1 += h2;
h1 = h1 * 5 + 0x52dce729;

k2 *= c2;
k2 = rotl64(k2, 33);
k2 *= c1;

h2 ^= k2;
h2 = rotl64(h2, 31);
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
//----------
// tail
std::uint64_t k1 = 0;
std::uint64_t k2 = 0;
auto const tail = reinterpret_cast<uint8_t const*>(bytes) + nblocks * block_size;
switch (size & (block_size - 1)) {
case 15: k2 ^= static_cast<std::uint64_t>(tail[14]) << 48; [[fallthrough]];
case 14: k2 ^= static_cast<std::uint64_t>(tail[13]) << 40; [[fallthrough]];
case 13: k2 ^= static_cast<std::uint64_t>(tail[12]) << 32; [[fallthrough]];
case 12: k2 ^= static_cast<std::uint64_t>(tail[11]) << 24; [[fallthrough]];
case 11: k2 ^= static_cast<std::uint64_t>(tail[10]) << 16; [[fallthrough]];
case 10: k2 ^= static_cast<std::uint64_t>(tail[9]) << 8; [[fallthrough]];
case 9:
k2 ^= static_cast<std::uint64_t>(tail[8]) << 0;
k2 *= c2;
k2 = rotl64(k2, 33);
k2 *= c1;
h2 ^= k2;
[[fallthrough]];

case 8: k1 ^= static_cast<std::uint64_t>(tail[7]) << 56; [[fallthrough]];
case 7: k1 ^= static_cast<std::uint64_t>(tail[6]) << 48; [[fallthrough]];
case 6: k1 ^= static_cast<std::uint64_t>(tail[5]) << 40; [[fallthrough]];
case 5: k1 ^= static_cast<std::uint64_t>(tail[4]) << 32; [[fallthrough]];
case 4: k1 ^= static_cast<std::uint64_t>(tail[3]) << 24; [[fallthrough]];
case 3: k1 ^= static_cast<std::uint64_t>(tail[2]) << 16; [[fallthrough]];
case 2: k1 ^= static_cast<std::uint64_t>(tail[1]) << 8; [[fallthrough]];
case 1:
k1 ^= static_cast<std::uint64_t>(tail[0]) << 0;
k1 *= c1;
k1 = rotl64(k1, 31);
k1 *= c2;
h1 ^= k1;
};
//----------
// finalization
h1 ^= size;
h2 ^= size;

h1 += h2;
h2 += h1;

h1 = fmix64_(h1);
h2 = fmix64_(h2);

h1 += h2;
h2 += h1;

return {h1, h2};
}

private:
constexpr __host__ __device__ std::uint64_t rotl64(std::uint64_t x, std::int8_t r) const noexcept
{
return (x << r) | (x >> (64 - r));
}

MurmurHash3_fmix64<std::uint64_t> fmix64_;
std::uint64_t seed_;
};
} // namespace cuco::detail
8 changes: 8 additions & 0 deletions include/cuco/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ using murmurhash3_fmix_64 = detail::MurmurHash3_fmix64<Key>;
template <typename Key>
using murmurhash3_32 = detail::MurmurHash3_32<Key>;

/**
* @brief A 128-bit `MurmurHash3` hash function to hash the given argument on host and device.
*
* @tparam Key The type of the values to hash
*/
template <typename Key>
using murmurhash3_x64_128 = detail::MurmurHash3_x64_128<Key>;

/**
* @brief A 32-bit `XXH32` hash function to hash the given argument on host and device.
*
Expand Down
94 changes: 92 additions & 2 deletions tests/utility/hash_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ struct large_key {
int32_t data_[Words];
};

template <typename Hash>
template <typename Hash, typename Seed = typename Hash::result_type>
__host__ __device__ bool check_hash_result(typename Hash::argument_type const& key,
typename Hash::result_type seed,
Seed seed,
typename Hash::result_type expected) noexcept
{
Hash h(seed);
Expand Down Expand Up @@ -195,4 +195,94 @@ TEMPLATE_TEST_CASE_SIG("Static vs. dynamic key hash test",
CHECK(hash(key) ==
hash.compute_hash(reinterpret_cast<std::byte const*>(&key), sizeof(key_type)));
}
}

template <typename OutputIter>
__global__ void check_murmurhash3_128_result_kernel(OutputIter result)
{
int i = 0;

result[i++] = check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
0, 0, {14961230494313510588ull, 6383328099726337777ull});
result[i++] = check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
9, 0, {1779292183511753683ull, 16298496441448380334ull});
result[i++] = check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
42, 0, {2913627637088662735ull, 16344193523890567190ull});
result[i++] = check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
42, 42, {2248879576374326886ull, 18006515275339376488ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 2>>, uint64_t>(
{2, 2}, 0, {12221386834995143465ull, 6690950894782946573ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 3>>, uint64_t>(
{1, 4, 9}, 42, {299140022350411792ull, 9891903873182035274ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 4>>, uint64_t>(
{42, 64, 108, 1024}, 63, {4333511168876981289ull, 4659486988434316416ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 16>>, uint64_t>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
1024,
{3302412811061286680ull, 7070355726356610672ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 2>>, uint64_t>(
{2, 2}, 0, {8554944597931919519ull, 14938998000509429729ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 3>>, uint64_t>(
{1, 4, 9}, 42, {13442629947720186435ull, 7061727494178573325ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 4>>, uint64_t>(
{42, 64, 108, 1024}, 63, {8786399719555989948ull, 14954183901757012458ull});
result[i++] =
check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 16>>, uint64_t>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
1024,
{15409921801541329777ull, 10546487400963404004ull});
}

TEST_CASE("Test cuco::murmurhash3_x64_128", "")
{
// Reference hash values were computed using
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp

SECTION("Check if host-generated hash values match the reference implementation.")
{
CHECK(check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
0, 0, {14961230494313510588ull, 6383328099726337777ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
9, 0, {1779292183511753683ull, 16298496441448380334ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
42, 0, {2913627637088662735ull, 16344193523890567190ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<int32_t>, uint64_t>(
42, 42, {2248879576374326886ull, 18006515275339376488ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 2>>, uint64_t>(
{2, 2}, 0, {12221386834995143465ull, 6690950894782946573ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 3>>, uint64_t>(
{1, 4, 9}, 42, {299140022350411792ull, 9891903873182035274ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 4>>, uint64_t>(
{42, 64, 108, 1024}, 63, {4333511168876981289ull, 4659486988434316416ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int32_t, 16>>, uint64_t>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
1024,
{3302412811061286680ull, 7070355726356610672ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 2>>, uint64_t>(
{2, 2}, 0, {8554944597931919519ull, 14938998000509429729ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 3>>, uint64_t>(
{1, 4, 9}, 42, {13442629947720186435ull, 7061727494178573325ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 4>>, uint64_t>(
{42, 64, 108, 1024}, 63, {8786399719555989948ull, 14954183901757012458ull}));
CHECK(check_hash_result<cuco::murmurhash3_x64_128<cuda::std::array<int64_t, 16>>, uint64_t>(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
1024,
{15409921801541329777ull, 10546487400963404004ull}));
}

SECTION("Check if device-generated hash values match the reference implementation.")
{
thrust::device_vector<bool> result(12, true);

check_murmurhash3_128_result_kernel<<<1, 1>>>(result.begin());

CHECK(cuco::test::all_of(result.begin(), result.end(), [] __device__(bool v) { return v; }));
}
}

0 comments on commit 03304a3

Please sign in to comment.