diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2024-06-08 10:38:46 +0200 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2024-06-08 10:39:44 +0200 |
commit | 7f8d255ab9b468cce6b8bac76c2fb066babde5a4 (patch) | |
tree | 6a6dd4dd1c46e1c66c4610a5e367fb54bff1ef29 /compat | |
parent | c26e4ed3d5c0fa532356365bddd275fba04ae2c3 (diff) |
compat/hash: switch from fnv to siphash64 / murmurhash32
Diffstat (limited to 'compat')
-rw-r--r-- | compat/hash.cpp | 360 | ||||
-rw-r--r-- | compat/hash.hpp | 21 |
2 files changed, 260 insertions, 121 deletions
diff --git a/compat/hash.cpp b/compat/hash.cpp index 3cb3cc35..7a636079 100644 --- a/compat/hash.cpp +++ b/compat/hash.cpp @@ -1,146 +1,302 @@ -#include "compat/defs.hpp" #include "hash.hpp" -#include <Corrade/Containers/StringView.h> +#include <cr/StringView.h> +#include <cstdint> #include <bit> -#include <utility> -namespace floormat { +// ReSharper disable CppDefaultCaseNotHandledInSwitchStatement +#ifdef __GNUG__ +#pragma GCC diagnostic ignored "-Wcast-align" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif +namespace floormat::Hash::Murmur { namespace { +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. -using namespace floormat::Hash; +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. -[[maybe_unused]] -CORRADE_ALWAYS_INLINE size_t fnvhash_uint_32(uint32_t x) +CORRADE_ALWAYS_INLINE uint32_t rotl32(uint32_t x, int shift) { return std::rotl(x, shift); } +#define ROTL std::rotl + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +CORRADE_ALWAYS_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) { - constexpr auto params = fnvhash_params<sizeof(size_t)*8>{}; - constexpr auto a = params.a, b = params.b; - auto hash = a; - const auto* str = (const char*)&x; + return p[i]; +} - hash *= b; hash ^= (uint8_t)*str++; // 0 - hash *= b; hash ^= (uint8_t)*str++; // 1 - hash *= b; hash ^= (uint8_t)*str++; // 2 - hash *= b; hash ^= (uint8_t)*str++; // 3 +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche - return hash; +CORRADE_ALWAYS_INLINE uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; } -CORRADE_ALWAYS_INLINE size_t fnvhash_uint_64(uint64_t x) +//----------------------------------------------------------------------------- + +[[maybe_unused]] void MurmurHash3_x86_32 ( const void * __restrict key, size_t len, void * __restrict out ) { - constexpr auto params = fnvhash_params<sizeof(std::common_type_t<size_t>)*8>{}; - constexpr auto a = params.a, b = params.b; - auto hash = a; - const auto* str = (const char*)&x; - - hash *= b; hash ^= (uint8_t)*str++; // 0 - hash *= b; hash ^= (uint8_t)*str++; // 1 - hash *= b; hash ^= (uint8_t)*str++; // 2 - hash *= b; hash ^= (uint8_t)*str++; // 3 - hash *= b; hash ^= (uint8_t)*str++; // 4 - hash *= b; hash ^= (uint8_t)*str++; // 5 - hash *= b; hash ^= (uint8_t)*str++; // 6 - hash *= b; hash ^= (uint8_t)*str++; // 7 - - return hash; + constexpr auto seed = uint32_t{0xa6b8edcd}; + const auto* data = (const uint8_t*)key; + const int nblocks = (int)(len / 4); + + uint32_t h1 = seed; + + constexpr uint32_t c1 = 0xcc9e2d51; + constexpr uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto* blocks = (const uint32_t *)(data + (size_t)(nblocks*4)); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock32(blocks,i); + + k1 *= c1; + k1 = rotl32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = rotl32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const auto* tail = (const uint8_t*)(data + (size_t)(nblocks*4)); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; [[fallthrough]]; + case 2: k1 ^= tail[1] << 8; [[fallthrough]]; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; + } + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; } } // namespace +} // namespace floormat::Hash::Murmur + +namespace floormat::Hash::SipHash { +namespace { -uint32_t hash_32(const void* buf, size_t size) noexcept +/* default: SipHash-2-4 */ +constexpr inline int cROUNDS = 2, dROUNDS = 4; + +CORRADE_ALWAYS_INLINE void U32TO8_LE(uint8_t* __restrict p, uint32_t v) { - constexpr auto params = fnvhash_params<32>{}; - constexpr auto a = params.a, b = params.b; - auto hash = a; - const auto *str = (const char*)buf, *const end = str + size; + (p)[0] = (uint8_t)((v)); + (p)[1] = (uint8_t)((v) >> 8); + (p)[2] = (uint8_t)((v) >> 16); + (p)[3] = (uint8_t)((v) >> 24); +} -fm_UNROLL_4 - for (; str != end; ++str) - { - hash *= b; - hash ^= (uint8_t)*str; - } - return hash; +CORRADE_ALWAYS_INLINE void U64TO8_LE(uint8_t* __restrict p, uint64_t v) +{ + U32TO8_LE((p), (uint32_t)((v))); + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); } -uint64_t hash_64(const void* buf, size_t size) noexcept +constexpr CORRADE_ALWAYS_INLINE uint64_t U8TO64_LE(const uint8_t* __restrict p) { - constexpr auto params = fnvhash_params<sizeof(size_t)*8>{}; - constexpr auto a = params.a, b = params.b; - auto hash = a; - const auto *str = (const char*)buf, *const end = str + size; + return (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)); +} -fm_UNROLL_8 - for (; str != end; ++str) - { - hash *= b; - hash ^= (uint8_t)*str; +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = ROTL(v0, 32); \ + v2 += v3; \ + v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = ROTL(v2, 32); \ + } while (0) + +struct Key { uint64_t k0, k1; }; +constexpr Key make_key() +{ + uint8_t buf[16]; + for (uint8_t i = 0; i < 16; i++) + buf[i] = i; + return { U8TO64_LE(buf), U8TO64_LE(buf+8) }; +} + +/* + Computes a SipHash value + *in: pointer to input data (read-only) + inlen: input data length in bytes (any size_t value) + *k: pointer to the key data (read-only), must be 16 bytes + *out: pointer to output data (write-only), outlen bytes must be allocated + outlen: length of the output in bytes, must be 8 or 16 +*/ +[[maybe_unused]] +void siphash(const void * __restrict in, const size_t inlen, uint8_t * __restrict out) { + + const auto* ni = (const unsigned char *)in; + + uint64_t v0 = UINT64_C(0x736f6d6570736575); + uint64_t v1 = UINT64_C(0x646f72616e646f6d); + uint64_t v2 = UINT64_C(0x6c7967656e657261); + uint64_t v3 = UINT64_C(0x7465646279746573); + //uint64_t k0 = U8TO64_LE(kk); + //uint64_t k1 = U8TO64_LE(kk + 8); + constexpr auto key_ = make_key(); + constexpr uint64_t k0 = key_.k0; + constexpr uint64_t k1 = key_.k1; + uint64_t m; + int i; + const uint8_t *end = ni + inlen - (inlen % sizeof(uint64_t)); + const auto left = (uint32_t)(inlen & 7); + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + +#if 0 + if (outlen == 16) + v1 ^= 0xee; +#endif + + for (; ni != end; ni += 8) { + m = U8TO64_LE(ni); + v3 ^= m; + + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: + b |= ((uint64_t)ni[6]) << 48; [[fallthrough]]; + case 6: + b |= ((uint64_t)ni[5]) << 40; [[fallthrough]]; + case 5: + b |= ((uint64_t)ni[4]) << 32; [[fallthrough]]; + case 4: + b |= ((uint64_t)ni[3]) << 24; [[fallthrough]]; + case 3: + b |= ((uint64_t)ni[2]) << 16; [[fallthrough]]; + case 2: + b |= ((uint64_t)ni[1]) << 8; [[fallthrough]]; + case 1: + b |= ((uint64_t)ni[0]); [[fallthrough]]; + case 0: + break; } - return hash; + + v3 ^= b; + + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + +#if 0 + if (outlen == 16) + v2 ^= 0xee; + else +#endif + v2 ^= 0xff; + + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + +#if 0 + if (outlen == 8) + return 0; + + v1 ^= 0xdd; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out + 8, b); +#endif } -size_t hash_buf(const void* buf, size_t size) noexcept +} // namespace +} // namespace floormat::Hash::SipHash + +namespace floormat { + +size_t hash_buf(const void* __restrict buf, size_t size) noexcept { #if 1 if constexpr(sizeof nullptr > 4) - return hash_64(buf, size); + { + uint64_t ret = 0; + Hash::SipHash::siphash(buf, size, reinterpret_cast<uint8_t*>(&ret)); + return size_t{ret}; + } else #endif - return hash_32(buf, size); + { + uint32_t ret; + Hash::Murmur::MurmurHash3_x86_32(buf, size, &ret); + return size_t{ret}; + } } size_t hash_int(uint32_t x) noexcept { -#if 1 - if constexpr(sizeof nullptr > 4) - return fnvhash_uint_64(x); - else -#endif - return fnvhash_uint_32(x); + return hash_buf(&x, sizeof x); } size_t hash_int(uint64_t x) noexcept { - return fnvhash_uint_64(x); + return hash_buf(&x, sizeof x); } -size_t hash_string_view::operator()(StringView s) const noexcept { return Hash::fnvhash_buf(s.data(), s.size()); } - -} // namespace floormat - -namespace floormat::Hash { - -size_t fnvhash_buf(const void* __restrict buf, size_t size, size_t seed) noexcept +size_t hash_string_view::operator()(StringView str) const noexcept { - constexpr size_t b{fnvhash_params<sizeof nullptr*8>::b}; - size_t full_rounds = size / 8, rest = size % 8; - size_t hash = seed; - const char* str = (const char*)buf; - - while (full_rounds--) - { - hash *= b; hash ^= (uint8_t)*str++; // 0 - hash *= b; hash ^= (uint8_t)*str++; // 1 - hash *= b; hash ^= (uint8_t)*str++; // 2 - hash *= b; hash ^= (uint8_t)*str++; // 3 - hash *= b; hash ^= (uint8_t)*str++; // 4 - hash *= b; hash ^= (uint8_t)*str++; // 5 - hash *= b; hash ^= (uint8_t)*str++; // 6 - hash *= b; hash ^= (uint8_t)*str++; // 7 - } - switch (rest) - { - case 7: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 6: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 5: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 4: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 3: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 2: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 1: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]]; - case 0: break; - default: std::unreachable(); - } - return hash; + return hash_buf(str.data(), str.size()); } -} // namespace floormat::Hash +} // namespace floormat diff --git a/compat/hash.hpp b/compat/hash.hpp index c512d249..a9e9b355 100644 --- a/compat/hash.hpp +++ b/compat/hash.hpp @@ -1,25 +1,8 @@ #pragma once -// todo rename to hash-fnv.hpp - -namespace floormat::Hash { - -template<size_t N = sizeof nullptr * 8> struct fnvhash_params; -template<> struct fnvhash_params<32> { static constexpr uint32_t a = 0x811c9dc5u, b = 0x01000193u; }; -template<> struct fnvhash_params<64> { static constexpr uint64_t a = 0xcbf29ce484222325u, b = 0x100000001b3u; }; - -constexpr inline size_t fnvhash_seed = fnvhash_params<>::a; - -size_t fnvhash_buf(const void* __restrict buf, size_t size, size_t seed = fnvhash_seed) noexcept; - -} // namespace floormat::Hash - -namespace floormat { // todo - -uint64_t hash_64(const void* buf, size_t size) noexcept; -uint32_t hash_32(const void* buf, size_t size) noexcept; -size_t hash_buf(const void* buf, size_t size) noexcept; +namespace floormat { +size_t hash_buf(const void* __restrict buf, size_t size) noexcept; size_t hash_int(uint32_t x) noexcept; size_t hash_int(uint64_t x) noexcept; |