summaryrefslogtreecommitdiffhomepage
path: root/compat
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2024-06-08 10:38:46 +0200
committerStanislaw Halik <sthalik@misaki.pl>2024-06-08 10:39:44 +0200
commit7f8d255ab9b468cce6b8bac76c2fb066babde5a4 (patch)
tree6a6dd4dd1c46e1c66c4610a5e367fb54bff1ef29 /compat
parentc26e4ed3d5c0fa532356365bddd275fba04ae2c3 (diff)
compat/hash: switch from fnv to siphash64 / murmurhash32
Diffstat (limited to 'compat')
-rw-r--r--compat/hash.cpp360
-rw-r--r--compat/hash.hpp21
2 files changed, 260 insertions, 121 deletions
diff --git a/compat/hash.cpp b/compat/hash.cpp
index 3cb3cc35..7a636079 100644
--- a/compat/hash.cpp
+++ b/compat/hash.cpp
@@ -1,146 +1,302 @@
-#include "compat/defs.hpp"
#include "hash.hpp"
-#include <Corrade/Containers/StringView.h>
+#include <cr/StringView.h>
+#include <cstdint>
#include <bit>
-#include <utility>
-namespace floormat {
+// ReSharper disable CppDefaultCaseNotHandledInSwitchStatement
+#ifdef __GNUG__
+#pragma GCC diagnostic ignored "-Wcast-align"
+#pragma GCC diagnostic ignored "-Wsign-conversion"
+#endif
+namespace floormat::Hash::Murmur {
namespace {
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
-using namespace floormat::Hash;
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
-[[maybe_unused]]
-CORRADE_ALWAYS_INLINE size_t fnvhash_uint_32(uint32_t x)
+CORRADE_ALWAYS_INLINE uint32_t rotl32(uint32_t x, int shift) { return std::rotl(x, shift); }
+#define ROTL std::rotl
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+CORRADE_ALWAYS_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
{
- constexpr auto params = fnvhash_params<sizeof(size_t)*8>{};
- constexpr auto a = params.a, b = params.b;
- auto hash = a;
- const auto* str = (const char*)&x;
+ return p[i];
+}
- hash *= b; hash ^= (uint8_t)*str++; // 0
- hash *= b; hash ^= (uint8_t)*str++; // 1
- hash *= b; hash ^= (uint8_t)*str++; // 2
- hash *= b; hash ^= (uint8_t)*str++; // 3
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
- return hash;
+CORRADE_ALWAYS_INLINE uint32_t fmix32 ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
}
-CORRADE_ALWAYS_INLINE size_t fnvhash_uint_64(uint64_t x)
+//-----------------------------------------------------------------------------
+
+[[maybe_unused]] void MurmurHash3_x86_32 ( const void * __restrict key, size_t len, void * __restrict out )
{
- constexpr auto params = fnvhash_params<sizeof(std::common_type_t<size_t>)*8>{};
- constexpr auto a = params.a, b = params.b;
- auto hash = a;
- const auto* str = (const char*)&x;
-
- hash *= b; hash ^= (uint8_t)*str++; // 0
- hash *= b; hash ^= (uint8_t)*str++; // 1
- hash *= b; hash ^= (uint8_t)*str++; // 2
- hash *= b; hash ^= (uint8_t)*str++; // 3
- hash *= b; hash ^= (uint8_t)*str++; // 4
- hash *= b; hash ^= (uint8_t)*str++; // 5
- hash *= b; hash ^= (uint8_t)*str++; // 6
- hash *= b; hash ^= (uint8_t)*str++; // 7
-
- return hash;
+ constexpr auto seed = uint32_t{0xa6b8edcd};
+ const auto* data = (const uint8_t*)key;
+ const int nblocks = (int)(len / 4);
+
+ uint32_t h1 = seed;
+
+ constexpr uint32_t c1 = 0xcc9e2d51;
+ constexpr uint32_t c2 = 0x1b873593;
+
+ //----------
+ // body
+
+ const auto* blocks = (const uint32_t *)(data + (size_t)(nblocks*4));
+
+ for(int i = -nblocks; i; i++)
+ {
+ uint32_t k1 = getblock32(blocks,i);
+
+ k1 *= c1;
+ k1 = rotl32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = rotl32(h1,13);
+ h1 = h1*5+0xe6546b64;
+ }
+
+ //----------
+ // tail
+
+ const auto* tail = (const uint8_t*)(data + (size_t)(nblocks*4));
+
+ uint32_t k1 = 0;
+
+ switch(len & 3)
+ {
+ case 3: k1 ^= tail[2] << 16; [[fallthrough]];
+ case 2: k1 ^= tail[1] << 8; [[fallthrough]];
+ case 1: k1 ^= tail[0];
+ k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
+ }
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+
+ h1 = fmix32(h1);
+
+ *(uint32_t*)out = h1;
}
} // namespace
+} // namespace floormat::Hash::Murmur
+
+namespace floormat::Hash::SipHash {
+namespace {
-uint32_t hash_32(const void* buf, size_t size) noexcept
+/* default: SipHash-2-4 */
+constexpr inline int cROUNDS = 2, dROUNDS = 4;
+
+CORRADE_ALWAYS_INLINE void U32TO8_LE(uint8_t* __restrict p, uint32_t v)
{
- constexpr auto params = fnvhash_params<32>{};
- constexpr auto a = params.a, b = params.b;
- auto hash = a;
- const auto *str = (const char*)buf, *const end = str + size;
+ (p)[0] = (uint8_t)((v));
+ (p)[1] = (uint8_t)((v) >> 8);
+ (p)[2] = (uint8_t)((v) >> 16);
+ (p)[3] = (uint8_t)((v) >> 24);
+}
-fm_UNROLL_4
- for (; str != end; ++str)
- {
- hash *= b;
- hash ^= (uint8_t)*str;
- }
- return hash;
+CORRADE_ALWAYS_INLINE void U64TO8_LE(uint8_t* __restrict p, uint64_t v)
+{
+ U32TO8_LE((p), (uint32_t)((v)));
+ U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
}
-uint64_t hash_64(const void* buf, size_t size) noexcept
+constexpr CORRADE_ALWAYS_INLINE uint64_t U8TO64_LE(const uint8_t* __restrict p)
{
- constexpr auto params = fnvhash_params<sizeof(size_t)*8>{};
- constexpr auto a = params.a, b = params.b;
- auto hash = a;
- const auto *str = (const char*)buf, *const end = str + size;
+ return (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |
+ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |
+ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |
+ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56));
+}
-fm_UNROLL_8
- for (; str != end; ++str)
- {
- hash *= b;
- hash ^= (uint8_t)*str;
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 32); \
+ } while (0)
+
+struct Key { uint64_t k0, k1; };
+constexpr Key make_key()
+{
+ uint8_t buf[16];
+ for (uint8_t i = 0; i < 16; i++)
+ buf[i] = i;
+ return { U8TO64_LE(buf), U8TO64_LE(buf+8) };
+}
+
+/*
+ Computes a SipHash value
+ *in: pointer to input data (read-only)
+ inlen: input data length in bytes (any size_t value)
+ *k: pointer to the key data (read-only), must be 16 bytes
+ *out: pointer to output data (write-only), outlen bytes must be allocated
+ outlen: length of the output in bytes, must be 8 or 16
+*/
+[[maybe_unused]]
+void siphash(const void * __restrict in, const size_t inlen, uint8_t * __restrict out) {
+
+ const auto* ni = (const unsigned char *)in;
+
+ uint64_t v0 = UINT64_C(0x736f6d6570736575);
+ uint64_t v1 = UINT64_C(0x646f72616e646f6d);
+ uint64_t v2 = UINT64_C(0x6c7967656e657261);
+ uint64_t v3 = UINT64_C(0x7465646279746573);
+ //uint64_t k0 = U8TO64_LE(kk);
+ //uint64_t k1 = U8TO64_LE(kk + 8);
+ constexpr auto key_ = make_key();
+ constexpr uint64_t k0 = key_.k0;
+ constexpr uint64_t k1 = key_.k1;
+ uint64_t m;
+ int i;
+ const uint8_t *end = ni + inlen - (inlen % sizeof(uint64_t));
+ const auto left = (uint32_t)(inlen & 7);
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+#if 0
+ if (outlen == 16)
+ v1 ^= 0xee;
+#endif
+
+ for (; ni != end; ni += 8) {
+ m = U8TO64_LE(ni);
+ v3 ^= m;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7:
+ b |= ((uint64_t)ni[6]) << 48; [[fallthrough]];
+ case 6:
+ b |= ((uint64_t)ni[5]) << 40; [[fallthrough]];
+ case 5:
+ b |= ((uint64_t)ni[4]) << 32; [[fallthrough]];
+ case 4:
+ b |= ((uint64_t)ni[3]) << 24; [[fallthrough]];
+ case 3:
+ b |= ((uint64_t)ni[2]) << 16; [[fallthrough]];
+ case 2:
+ b |= ((uint64_t)ni[1]) << 8; [[fallthrough]];
+ case 1:
+ b |= ((uint64_t)ni[0]); [[fallthrough]];
+ case 0:
+ break;
}
- return hash;
+
+ v3 ^= b;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= b;
+
+#if 0
+ if (outlen == 16)
+ v2 ^= 0xee;
+ else
+#endif
+ v2 ^= 0xff;
+
+ for (i = 0; i < dROUNDS; ++i)
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+ U64TO8_LE(out, b);
+
+#if 0
+ if (outlen == 8)
+ return 0;
+
+ v1 ^= 0xdd;
+
+ TRACE;
+ for (i = 0; i < dROUNDS; ++i)
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+ U64TO8_LE(out + 8, b);
+#endif
}
-size_t hash_buf(const void* buf, size_t size) noexcept
+} // namespace
+} // namespace floormat::Hash::SipHash
+
+namespace floormat {
+
+size_t hash_buf(const void* __restrict buf, size_t size) noexcept
{
#if 1
if constexpr(sizeof nullptr > 4)
- return hash_64(buf, size);
+ {
+ uint64_t ret = 0;
+ Hash::SipHash::siphash(buf, size, reinterpret_cast<uint8_t*>(&ret));
+ return size_t{ret};
+ }
else
#endif
- return hash_32(buf, size);
+ {
+ uint32_t ret;
+ Hash::Murmur::MurmurHash3_x86_32(buf, size, &ret);
+ return size_t{ret};
+ }
}
size_t hash_int(uint32_t x) noexcept
{
-#if 1
- if constexpr(sizeof nullptr > 4)
- return fnvhash_uint_64(x);
- else
-#endif
- return fnvhash_uint_32(x);
+ return hash_buf(&x, sizeof x);
}
size_t hash_int(uint64_t x) noexcept
{
- return fnvhash_uint_64(x);
+ return hash_buf(&x, sizeof x);
}
-size_t hash_string_view::operator()(StringView s) const noexcept { return Hash::fnvhash_buf(s.data(), s.size()); }
-
-} // namespace floormat
-
-namespace floormat::Hash {
-
-size_t fnvhash_buf(const void* __restrict buf, size_t size, size_t seed) noexcept
+size_t hash_string_view::operator()(StringView str) const noexcept
{
- constexpr size_t b{fnvhash_params<sizeof nullptr*8>::b};
- size_t full_rounds = size / 8, rest = size % 8;
- size_t hash = seed;
- const char* str = (const char*)buf;
-
- while (full_rounds--)
- {
- hash *= b; hash ^= (uint8_t)*str++; // 0
- hash *= b; hash ^= (uint8_t)*str++; // 1
- hash *= b; hash ^= (uint8_t)*str++; // 2
- hash *= b; hash ^= (uint8_t)*str++; // 3
- hash *= b; hash ^= (uint8_t)*str++; // 4
- hash *= b; hash ^= (uint8_t)*str++; // 5
- hash *= b; hash ^= (uint8_t)*str++; // 6
- hash *= b; hash ^= (uint8_t)*str++; // 7
- }
- switch (rest)
- {
- case 7: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 6: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 5: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 4: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 3: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 2: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 1: hash *= b; hash ^= (uint8_t)*str++; [[fallthrough]];
- case 0: break;
- default: std::unreachable();
- }
- return hash;
+ return hash_buf(str.data(), str.size());
}
-} // namespace floormat::Hash
+} // namespace floormat
diff --git a/compat/hash.hpp b/compat/hash.hpp
index c512d249..a9e9b355 100644
--- a/compat/hash.hpp
+++ b/compat/hash.hpp
@@ -1,25 +1,8 @@
#pragma once
-// todo rename to hash-fnv.hpp
-
-namespace floormat::Hash {
-
-template<size_t N = sizeof nullptr * 8> struct fnvhash_params;
-template<> struct fnvhash_params<32> { static constexpr uint32_t a = 0x811c9dc5u, b = 0x01000193u; };
-template<> struct fnvhash_params<64> { static constexpr uint64_t a = 0xcbf29ce484222325u, b = 0x100000001b3u; };
-
-constexpr inline size_t fnvhash_seed = fnvhash_params<>::a;
-
-size_t fnvhash_buf(const void* __restrict buf, size_t size, size_t seed = fnvhash_seed) noexcept;
-
-} // namespace floormat::Hash
-
-namespace floormat { // todo
-
-uint64_t hash_64(const void* buf, size_t size) noexcept;
-uint32_t hash_32(const void* buf, size_t size) noexcept;
-size_t hash_buf(const void* buf, size_t size) noexcept;
+namespace floormat {
+size_t hash_buf(const void* __restrict buf, size_t size) noexcept;
size_t hash_int(uint32_t x) noexcept;
size_t hash_int(uint64_t x) noexcept;