From 8638606d8c98f2d18926d3a9c11ead0ae0022130 Mon Sep 17 00:00:00 2001 From: Leonid Evdokimov Date: Thu, 26 Sep 2024 18:14:56 +0300 Subject: [PATCH] WIP2 --- Hashes.cpp | 82 +++++++++++++++++++++++------------------------------- Hashes.h | 3 ++ main.cpp | 4 +-- 3 files changed, 40 insertions(+), 49 deletions(-) diff --git a/Hashes.cpp b/Hashes.cpp index 42424513..0d1727ad 100644 --- a/Hashes.cpp +++ b/Hashes.cpp @@ -1277,8 +1277,6 @@ void crc32c_pclmul_test(const void *key, int len, uint32_t seed, void *out) } #endif -#define __sun 42 // to define NMH_RESTRICT - #include "hash-garage/nmhash.h" #define NMHASH32_DESC_STR "NMHASH_LITTLE_ENDIAN:" MACRO_ITOA(NMHASH_LITTLE_ENDIAN) ", " \ "NMH_VECTOR:" MACRO_ITOA(NMH_VECTOR) ", " \ @@ -1287,55 +1285,37 @@ void crc32c_pclmul_test(const void *key, int len, uint32_t seed, void *out) const char * const nmhash32_ver("nmhash32, " NMHASH32_DESC_STR); const char * const nmhash32x_ver("nmhash32x, " NMHASH32_DESC_STR); -constexpr unsigned sizeof_NMH_ACC_INIT = sizeof(NMH_ACC_INIT); -constexpr unsigned countof_NMH_ACC_INIT = sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT); -static const char rand_str[] = "rwgk8M1uxM6XX6c3teQX2yaw8FQWArmcWUSBJ8dcQQJWHYC9Wt2BmpvETxwhYcJTheTbjf49SVRaDJhbEZCq7ki1D6KxpKQSjgwqsiHGSgHLxvPG5kcRnBhjJ1YC8kuhbaJCWn9G6jm58Lr5wkVWNy38KcNjXVM3vExhQK516zntjrHTxSJYNQ1yvGucKZD8M34iBXT86B7y5xK3tu9pLGqgomKEbzvBJmod66BY3tDwRwttjfgB75xGAHMpLELXpArPJHWQxjHPQQgAcL9j4GDLyEmRSS2Fkv1sLwZoBMUmaD84cmKjocK4phgVEr13a65LsjKBw9Pg4VN4hkd149izWLjcA5"; +bool nmhash32_broken( void ) { + static bool done = false, result; + if (done) + return result; -// objsize: 4202f0-420c7d: 2445 -void nmhash32_test ( const void * key, int len, uint32_t seed, void * out ) { - static bool once = false; - if (!once) { - NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)] = { 0 }; - NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accY[sizeof(accX)/sizeof(*accX)] = { 0 }; - - printf("sizeof_NMH_ACC_INIT: %u\ncountof_NMH_ACC_INIT: %u\nsizeof(accX): %u\nsizeof(accY): %u\n", sizeof_NMH_ACC_INIT, countof_NMH_ACC_INIT, sizeof(accX), sizeof(accY)); - - const unsigned count = 256; - static_assert(sizeof(rand_str) >= count); - const uint32_t x = NMHASH32_long(reinterpret_cast(rand_str), count, 42); - printf("NMHASH32_long(rand_str, %u, 42) = %x\n", count, x); - - uint32_t oh; - memcpy(accX, &rand_str[11], sizeof(accX)); - memcpy(accY, &rand_str[23], sizeof(accY)); - - for (unsigned i = 0; i < COUNT_OF(accX); i++) printf("0: %4d %08x\n", i, accX[i]); - const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); - size_t i; - for (i = 0; i < nbGroups * 2; ++i) { - ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; // ??? - } - for (unsigned i = 0; i < COUNT_OF(accX); i++) printf("1: %4d %08x\n", i, accX[i]); -#if 0 - for (i = 0; i < nbGroups; ++i) { - accX[i] ^= accX[i] << 5 ^ accX[i] >> 13; // ??? - } - for (unsigned i = 0; i < COUNT_OF(accX); i++) printf("2: %4d %08x\n", i, accX[i]); -#endif + const char entropy[] = "rwgk8M1uxM6XX6c3teQX2yaw8FQWArmcWUSBJ8dcQQJWHYC9Wt2BmpvETxwhYcJTheTbjf49SVRaDJhbEZCq7ki1D6KxpKQSjgwqsiHGSgHLxvPG5kcRnBhjJ1YC8kuh"; + + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)]; + static_assert(sizeof(entropy) >= sizeof(accX), "Need more entropy in entropy[]"); + memcpy(accX, entropy, sizeof(accX)); + + const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); + size_t i; - sumhash32(accX, sizeof(accX), 0, &oh); printf("sumhash32(accX) = %x\n", oh); - sumhash32(accY, sizeof(accY), 0, &oh); printf("sumhash32(accY) = %x\n", oh); - NMHASH32_long_round(accX, accY, reinterpret_cast(rand_str)); - sumhash32(accX, sizeof(accX), 0, &oh); printf("-> sumhash32(accX) = %x\n", oh); - sumhash32(accY, sizeof(accY), 0, &oh); printf("-> sumhash32(accY) = %x\n", oh); - static_assert(COUNT_OF(accX) == COUNT_OF(accY) && COUNT_OF(accX) == COUNT_OF(NMH_ACC_INIT)); - for (unsigned i = 0; i < COUNT_OF(accX); i++) - printf("%4d %08x %08x %08x %08x -> %08x %08x\n", - i, NMH_ACC_INIT[i],__NMH_M1_V[i], __NMH_M2_V[i], __NMH_M3_V[i], - accX[i], accY[i]); - once = true; + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; } + // NB: no memory barrier takes place here, just like in NMHASH32_long_round_scalar() + uint32_t acc = 0; + for (i = 0; i < nbGroups; ++i) + acc += accX[i]; + + result = (acc != UINT32_C(0x249abaee)); + done = true; + return result; +} + + +// objsize: 4202f0-420c7d: 2445 +void nmhash32_test ( const void * key, int len, uint32_t seed, void * out ) { const uint32_t v = NMHASH32 (key, (const size_t) len, seed); // printf("NMHASH32(%p, %d, %u) = %u\n", key, len, seed, v); *(uint32_t*)out = v; @@ -1350,6 +1330,14 @@ void nmhash32x_test ( const void * key, int len, uint32_t seed, void * out ) { #ifdef HAVE_KHASHV #include "k-hashv/khashv.h" +#define KHASH_VER_STR "vector:" MACRO_ITOA(KHASH_VECTOR) ", " \ + "scalar:" MACRO_ITOA(KHASHV_SCALAR) ", " \ + "__SSE3__:" MACRO_ITOA(__SSE3__) ", " \ + "__SSE4_1__:" MACRO_ITOA(__SSE4_1__) ", " \ + "__AVX512VL__:" MACRO_ITOA(__AVX512VL__) +const char * const khashv32_desc("Vectorized K-HashV, 32-bit, " KHASH_VER_STR); +const char * const khashv64_desc("Vectorized K-HashV, 64-bit, " KHASH_VER_STR); + khashvSeed khashv_seed; void khashv_seed_init(size_t &seed) { khashv_prep_seed64 (&khashv_seed, seed); diff --git a/Hashes.h b/Hashes.h index c2977969..05c52289 100644 --- a/Hashes.h +++ b/Hashes.h @@ -1315,6 +1315,7 @@ extern "C" { extern const char * const nmhash32_ver; extern const char * const nmhash32x_ver; +bool nmhash32_broken ( void ); void nmhash32_test ( const void * key, int len, uint32_t seed, void * out ); void nmhash32x_test ( const void * key, int len, uint32_t seed, void * out ); @@ -1391,6 +1392,8 @@ inline void khash64_test ( const void *key, int len, uint32_t seed, void *out) { #ifndef HAVE_BIT32 void khashv_seed_init(size_t &seed); // call to khashv_hash_vector not inlined. +extern const char * const khashv32_desc; +extern const char * const khashv64_desc; void khashv32_test ( const void *key, int len, uint32_t seed, void *out); void khashv64_test ( const void *key, int len, uint32_t seed, void *out); #endif diff --git a/main.cpp b/main.cpp index 14c79761..90c88c8f 100644 --- a/main.cpp +++ b/main.cpp @@ -758,8 +758,8 @@ HashInfo g_hashes[] = { rapidhash_test, 64, 0xAF404C4B, "rapidhash", "rapidhash v1", GOOD, {}}, { rapidhash_unrolled_test, 64, 0xAF404C4B, "rapidhash_unrolled", "rapidhash v1 - unrolled", GOOD, {}}, #endif -{ nmhash32_test, 32, 0x12A30553, "nmhash32", nmhash32_ver, GOOD, {}}, -{ nmhash32x_test, 32, 0xA8580227, "nmhash32x", nmhash32x_ver, GOOD, {}}, +{ nmhash32_test, 32, nmhash32_broken() ? 0U : 0x12A30553, "nmhash32", nmhash32_ver, GOOD, {}}, +{ nmhash32x_test, 32, nmhash32_broken() ? 0U : 0xA8580227, "nmhash32x", nmhash32x_ver, GOOD, {}}, #ifdef HAVE_KHASHV // There are certain GCC versions producing 0x9A8F7952 and 0X90A2A4F9 as verification values // for k-hashv32 and k-hashv64. That deserves further investigation.