dice-hash
Hash function for stl types and container
Loading...
Searching...
No Matches
wyhash.h
1#ifndef DICE_HASH_WYHASH_H
2#define DICE_HASH_WYHASH_H
3// This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/)
4// main repo: https://github.com/wangyi-fudan/wyhash
5// author: 王一 Wang Yi <godspeed_china@yeah.net>
6// contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric
7
8/* quick example:
9 string s="fjsakfdsjkf";
10 uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp);
11*/
12
13#ifndef wyhash_final_version_3
14#define wyhash_final_version_3
15
16#ifndef WYHASH_CONDOM
17//protections that produce different results:
18//1: normal valid behavior
19//2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
20#define WYHASH_CONDOM 2 // dice-hash always uses comdom 2
21#endif
22
23#ifndef WYHASH_32BIT_MUM
24//0: normal version, slow on 32 bit systems
25//1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function
26#define WYHASH_32BIT_MUM 0
27#endif
28
29//includes
30#include <stdint.h>
31#include <string.h>
32#if defined(_MSC_VER) && defined(_M_X64)
33#include <intrin.h>
34#pragma intrinsic(_umul128)
35#endif
36
37//likely and unlikely macros
38#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
39#define _likely_(x) __builtin_expect(x, 1)
40#define _unlikely_(x) __builtin_expect(x, 0)
41#else
42#define _likely_(x) (x)
43#define _unlikely_(x) (x)
44#endif
45namespace dice::hash::wyhash {
46 //128bit multiply function
47 static inline uint64_t _wyrot(uint64_t x) { return (x >> 32) | (x << 32); }
48 static inline void _wymum(uint64_t *A, uint64_t *B) {
49#if (WYHASH_32BIT_MUM)
50 uint64_t hh = (*A >> 32) * (*B >> 32), hl = (*A >> 32) * (uint32_t) *B, lh = (uint32_t) *A * (*B >> 32), ll = (uint64_t) (uint32_t) *A * (uint32_t) *B;
51#if (WYHASH_CONDOM > 1)
52 *A ^= _wyrot(hl) ^ hh;
53 *B ^= _wyrot(lh) ^ ll;
54#else
55 *A = _wyrot(hl) ^ hh;
56 *B = _wyrot(lh) ^ ll;
57#endif
58#elif defined(__SIZEOF_INT128__)
59 __uint128_t r = *A;
60 r *= *B;
61#if (WYHASH_CONDOM > 1)
62 *A ^= (uint64_t) r;
63 *B ^= (uint64_t) (r >> 64);
64#else
65 *A = (uint64_t) r;
66 *B = (uint64_t) (r >> 64);
67#endif
68#elif defined(_MSC_VER) && defined(_M_X64)
69#if (WYHASH_CONDOM > 1)
70 uint64_t a, b;
71 a = _umul128(*A, *B, &b);
72 *A ^= a;
73 *B ^= b;
74#else
75 *A = _umul128(*A, *B, B);
76#endif
77#else
78 uint64_t ha = *A >> 32, hb = *B >> 32, la = (uint32_t) *A, lb = (uint32_t) *B, hi, lo;
79 uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl;
80 lo = t + (rm1 << 32);
81 c += lo < t;
82 hi = rh + (rm0 >> 32) + (rm1 >> 32) + c;
83#if (WYHASH_CONDOM > 1)
84 *A ^= lo;
85 *B ^= hi;
86#else
87 *A = lo;
88 *B = hi;
89#endif
90#endif
91 }
92
93 //multiply and xor mix function, aka MUM
94 static inline uint64_t _wymix(uint64_t A, uint64_t B) {
95 _wymum(&A, &B);
96 return A ^ B;
97 }
98
99//endian macros
100#ifndef WYHASH_LITTLE_ENDIAN
101#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
102#define WYHASH_LITTLE_ENDIAN 1
103#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
104#define WYHASH_LITTLE_ENDIAN 0
105#else
106#warning could not determine endianness! Falling back to little endian.
107#define WYHASH_LITTLE_ENDIAN 1
108#endif
109#endif
110
111//read functions
112#if (WYHASH_LITTLE_ENDIAN)
113 static inline uint64_t _wyr8(const uint8_t *p) {
114 uint64_t v;
115 memcpy(&v, p, 8);
116 return v;
117 }
118 static inline uint64_t _wyr4(const uint8_t *p) {
119 uint32_t v;
120 memcpy(&v, p, 4);
121 return v;
122 }
123#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
124 static inline uint64_t _wyr8(const uint8_t *p) {
125 uint64_t v;
126 memcpy(&v, p, 8);
127 return __builtin_bswap64(v);
128 }
129 static inline uint64_t _wyr4(const uint8_t *p) {
130 uint32_t v;
131 memcpy(&v, p, 4);
132 return __builtin_bswap32(v);
133 }
134#elif defined(_MSC_VER)
135 static inline uint64_t _wyr8(const uint8_t *p) {
136 uint64_t v;
137 memcpy(&v, p, 8);
138 return _byteswap_uint64(v);
139 }
140 static inline uint64_t _wyr4(const uint8_t *p) {
141 uint32_t v;
142 memcpy(&v, p, 4);
143 return _byteswap_ulong(v);
144 }
145#else
146 static inline uint64_t _wyr8(const uint8_t *p) {
147 uint64_t v;
148 memcpy(&v, p, 8);
149 return (((v >> 56) & 0xff) | ((v >> 40) & 0xff00) | ((v >> 24) & 0xff0000) | ((v >> 8) & 0xff000000) | ((v << 8) & 0xff00000000) | ((v << 24) & 0xff0000000000) | ((v << 40) & 0xff000000000000) | ((v << 56) & 0xff00000000000000));
150 }
151 static inline uint64_t _wyr4(const uint8_t *p) {
152 uint32_t v;
153 memcpy(&v, p, 4);
154 return (((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000));
155 }
156#endif
157 static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t) p[0]) << 16) | (((uint64_t) p[k >> 1]) << 8) | p[k - 1]; }
158 //wyhash main function
159 static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret) {
160 const uint8_t *p = (const uint8_t *) key;
161 seed ^= *secret;
162 uint64_t a, b;
163 if (_likely_(len <= 16)) {
164 if (_likely_(len >= 4)) {
165 a = (_wyr4(p) << 32) | _wyr4(p + ((len >> 3) << 2));
166 b = (_wyr4(p + len - 4) << 32) | _wyr4(p + len - 4 - ((len >> 3) << 2));
167 } else if (_likely_(len > 0)) {
168 a = _wyr3(p, len);
169 b = 0;
170 } else
171 a = b = 0;
172 } else {
173 size_t i = len;
174 if (_unlikely_(i > 48)) {
175 uint64_t see1 = seed, see2 = seed;
176 do {
177 seed = _wymix(_wyr8(p) ^ secret[1], _wyr8(p + 8) ^ seed);
178 see1 = _wymix(_wyr8(p + 16) ^ secret[2], _wyr8(p + 24) ^ see1);
179 see2 = _wymix(_wyr8(p + 32) ^ secret[3], _wyr8(p + 40) ^ see2);
180 p += 48;
181 i -= 48;
182 } while (_likely_(i > 48));
183 seed ^= see1 ^ see2;
184 }
185 while (_unlikely_(i > 16)) {
186 seed = _wymix(_wyr8(p) ^ secret[1], _wyr8(p + 8) ^ seed);
187 i -= 16;
188 p += 16;
189 }
190 a = _wyr8(p + i - 16);
191 b = _wyr8(p + i - 8);
192 }
193 return _wymix(secret[1] ^ len, _wymix(a ^ secret[1], b ^ seed));
194 }
195
196 //the default secret parameters
197 static constexpr const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull};
198
199 //a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand
200 static inline uint64_t wyhash64(uint64_t A, uint64_t B) {
201 A ^= 0xa0761d6478bd642full;
202 B ^= 0xe7037ed1a0b428dbull;
203 _wymum(&A, &B);
204 return _wymix(A ^ 0xa0761d6478bd642full, B ^ 0xe7037ed1a0b428dbull);
205 }
206
207 //The wyrand PRNG that pass BigCrush and PractRand
208 static inline uint64_t wyrand(uint64_t *seed) {
209 *seed += 0xa0761d6478bd642full;
210 return _wymix(*seed, *seed ^ 0xe7037ed1a0b428dbull);
211 }
212
213 //convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash.
214 static inline double wy2u01(uint64_t r) {
215 const double _wynorm = 1.0 / (1ull << 52);
216 return (r >> 12) * _wynorm;
217 }
218
219 //convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash.
220 static inline double wy2gau(uint64_t r) {
221 const double _wynorm = 1.0 / (1ull << 20);
222 return ((r & 0x1fffff) + ((r >> 21) & 0x1fffff) + ((r >> 42) & 0x1fffff)) * _wynorm - 3.0;
223 }
224
225#if (!WYHASH_32BIT_MUM)
226 //fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash.
227 static inline uint64_t wy2u0k(uint64_t r, uint64_t k) {
228 _wymum(&r, &k);
229 return k;
230 }
231#endif
232
233 //make your own secret
234 static inline void make_secret(uint64_t seed, uint64_t *secret) {
235 uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240};
236 for (size_t i = 0; i < 4; i++) {
237 uint8_t ok;
238 do {
239 ok = 1;
240 secret[i] = 0;
241 for (size_t j = 0; j < 64; j += 8) secret[i] |= ((uint64_t) c[wyrand(&seed) % sizeof(c)]) << j;
242 if (secret[i] % 2 == 0) {
243 ok = 0;
244 continue;
245 }
246 for (size_t j = 0; j < i; j++) {
247#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
248 if (__builtin_popcountll(secret[j] ^ secret[i]) != 32) {
249 ok = 0;
250 break;
251 }
252#elif defined(_MSC_VER) && defined(_M_X64)
253 if (_mm_popcnt_u64(secret[j] ^ secret[i]) != 32) {
254 ok = 0;
255 break;
256 }
257#else
258 //manual popcount
259 uint64_t x = secret[j] ^ secret[i];
260 x -= (x >> 1) & 0x5555555555555555;
261 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
262 x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
263 x = (x * 0x0101010101010101) >> 56;
264 if (x != 32) {
265 ok = 0;
266 break;
267 }
268#endif
269 }
270 } while (!ok);
271 }
272 }
273
274#endif
275}
276/* The Unlicense
277This is free and unencumbered software released into the public domain.
278
279Anyone is free to copy, modify, publish, use, compile, sell, or
280distribute this software, either in source code form or as a compiled
281binary, for any purpose, commercial or non-commercial, and by any
282means.
283
284In jurisdictions that recognize copyright laws, the author or authors
285of this software dedicate any and all copyright interest in the
286software to the public domain. We make this dedication for the benefit
287of the public at large and to the detriment of our heirs and
288successors. We intend this dedication to be an overt act of
289relinquishment in perpetuity of all present and future rights to this
290software under copyright law.
291
292THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
293EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
294MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
295IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
296OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
297ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
298OTHER DEALINGS IN THE SOFTWARE.
299
300For more information, please refer to <http://unlicense.org/>
301*/
302
303#endif//DICE_HASH_WYHASH_H