1 module rt.compiler.util.hash; 2 3 /// hashes bStart[0..length] bytes 4 extern(C) hash_t rt_hash_str(void *bStart,size_t length, hash_t seed=0){ 5 static if (is(hash_t==uint)||is(hash_t==int)){ 6 return cast(hash_t)murmurHashAligned2(bStart,length,cast(uint)seed); 7 } else static if (is(hash_t==ulong)||is(hash_t==long)){ 8 return cast(hash_t)lookup8_hash(cast(ubyte*)bStart,length,cast(ulong)seed); 9 } else { 10 static assert(0,"unsupported type for hash_t: "~hash_t.stringof); 11 } 12 } 13 14 /// hashes bStart[0..length] size_t 15 extern(C) hash_t rt_hash_block(size_t *bStart,size_t length, hash_t seed=0){ 16 static if (is(hash_t==uint)||is(hash_t==int)){ 17 return cast(hash_t)murmurHashAligned2(bStart,length,cast(uint)seed); 18 } else static if (is(hash_t==ulong)||is(hash_t==long)){ 19 return cast(hash_t)lookup8_hash2(cast(ulong*)bStart,length,cast(ulong)seed); 20 } else { 21 static assert(0,"unsupported type for hash_t: "~hash_t.stringof); 22 } 23 } 24 25 /// hashes UTF-8 chars using the UTF codepoints (slower than rt_hash_str that uses the bit representation) 26 extern(C) uint rt_hash_utf8(char[] str, uint seed=0){ 27 const uint m = 0x5bd1e995; 28 const int r = 24; 29 30 uint h = seed ; // ^ length 31 foreach (c;str) 32 { 33 uint k = cast(uint)c; 34 // MIX(h,k,m); 35 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 36 } 37 h ^= h >> 13; 38 h *= m; 39 h ^= h >> 15; 40 return h; 41 } 42 43 /// hashes UTF-16 chars using the UTF codepoints (slower than rt_hash_str that uses the bit representation) 44 extern(C) uint rt_hash_utf16(wchar[] str, uint seed=0){ 45 const uint m = 0x5bd1e995; 46 const int r = 24; 47 48 uint h = seed ; // ^ length 49 foreach (c;str) 50 { 51 uint k = cast(uint)c; 52 // MIX(h,k,m); 53 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 54 } 55 h ^= h >> 13; 56 h *= m; 57 h ^= h >> 15; 58 return h; 59 } 60 61 /// hashes UTF-32 chars using the UTF codepoints (should be equivalent to rt_hash_str that uses the bit representation) 62 extern(C) uint rt_hash_utf32(dchar[] str, uint seed=0){ 63 const uint m = 0x5bd1e995; 64 const int r = 24; 65 66 uint h = seed ; // ^ length 67 foreach (c;str) 68 { 69 uint k = cast(uint)c; 70 // MIX(h,k,m); 71 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 72 } 73 h ^= h >> 13; 74 h *= m; 75 h ^= h >> 15; 76 return h; 77 } 78 79 /// combines two hash values 80 extern(C) hash_t rt_hash_combine( hash_t val1, hash_t val2 ){ 81 static if (is(hash_t==uint)||is(hash_t==int)){ 82 return cast(hash_t)rt_hash_combine32(cast(uint)val1,cast(uint)val2); 83 } else static if (is(hash_t==ulong)||is(hash_t==long)){ 84 return cast(hash_t)rt_hash_combine64(cast(ulong)val1,cast(ulong)val2); 85 } else { 86 static assert(0,"unsupported type for hash_t: "~hash_t.stringof); 87 } 88 } 89 90 /// hashes bStart[0..length] in an architecture neutral way 91 extern(C) uint rt_hash_str_neutral32(void *bStart,uint length, uint seed=0){ 92 return murmurHashNeutral2(bStart,length,seed); 93 } 94 95 /// hashes bStart[0..length] in an architecture neutral way 96 extern(C) ulong rt_hash_str_neutral64(void *bStart,ulong length, ulong seed=0){ 97 return cast(hash_t)lookup8_hash(cast(ubyte*)bStart,length,seed); 98 } 99 100 /// combines two hash values (32 bit, architecture neutral) 101 extern(C) uint rt_hash_combine32( uint val, uint seed ) 102 { 103 const uint m = 0x5bd1e995; 104 const int r = 24; 105 106 uint h = seed ^ 4u; 107 ubyte * data = cast(ubyte *)&val; 108 uint k; 109 110 k = (cast(uint)data[0]); 111 k |= (cast(uint)data[1]) << 8; 112 k |= (cast(uint)data[2]) << 16; 113 k |= (cast(uint)data[3]) << 24; 114 k *= m; 115 k ^= k >> r; 116 k *= m; 117 h *= m; 118 h ^= k; 119 h ^= h >> 13; 120 h *= m; 121 h ^= h >> 15; 122 123 return h; 124 } 125 126 /// combines two hash values (64 bit, architecture neutral) 127 extern(C) ulong rt_hash_combine64( ulong value, ulong level) 128 { 129 ulong a,b,c,len; 130 ubyte* k=cast(ubyte*)(&value); 131 132 /* Set up the internal state */ 133 a = b = level; /* the previous hash value */ 134 c = 0x9e3779b97f4a7c13UL; /* the golden ratio; an arbitrary value */ 135 c += 8; 136 a+=(cast(ulong)k[ 7])<<56; 137 a+=(cast(ulong)k[ 6])<<48; 138 a+=(cast(ulong)k[ 5])<<40; 139 a+=(cast(ulong)k[ 4])<<32; 140 a+=(cast(ulong)k[ 3])<<24; 141 a+=(cast(ulong)k[ 2])<<16; 142 a+=(cast(ulong)k[ 1])<<8; 143 a+=(cast(ulong)k[ 0]); 144 mixin(mix64abc); 145 146 return c; 147 } 148 149 private: 150 151 //----------------------------------------------------------------------------- 152 // murmurHashAligned2, by Austin Appleby 153 154 // Same algorithm as murmurHash2, but only does aligned reads - should be safer 155 // on certain platforms. 156 157 // Performance will be lower than murmurHash2 158 private uint murmurHashAligned2 ( void * key, uint len, uint seed ) 159 { 160 const uint m = 0x5bd1e995; 161 const int r = 24; 162 163 ubyte * data = cast(ubyte *)key; 164 165 uint h = seed ^ len; 166 167 int alignV = cast(int)data & 3; 168 169 if(alignV && (len >= 4)) 170 { 171 // Pre-load the temp registers 172 173 uint t = 0, d = 0; 174 175 switch(alignV) 176 { 177 case 1: t |= (cast(uint)data[2]) << 16; 178 case 2: t |= (cast(uint)data[1]) << 8; 179 case 3: t |= cast(uint)data[0]; 180 default: 181 } 182 183 t <<= (8 * alignV); 184 185 data += 4-alignV; 186 len -= 4-alignV; 187 188 int sl = 8 * (4-alignV); 189 int sr = 8 * alignV; 190 191 // Mix 192 193 while(len >= 4) 194 { 195 d = *cast(uint *)data; 196 t = (t >> sr) | (d << sl); 197 198 uint k = t; 199 200 // MIX(h,k,m); 201 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 202 203 204 t = d; 205 206 data += 4; 207 len -= 4; 208 } 209 210 // Handle leftover data in temp registers 211 212 d = 0; 213 214 if(len >= alignV) 215 { 216 switch(alignV) 217 { 218 case 3: d |= (cast(uint)data[2]) << 16; 219 case 2: d |= (cast(uint)data[1]) << 8; 220 case 1: d |= (cast(uint)data[0]); 221 default: 222 } 223 224 uint k = (t >> sr) | (d << sl); 225 // MIX(h,k,m); 226 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 227 228 data += alignV; 229 len -= alignV; 230 231 //---------- 232 // Handle tail bytes 233 234 switch(len) 235 { 236 case 3: h ^= (cast(uint)data[2]) << 16; 237 case 2: h ^= (cast(uint)data[1]) << 8; 238 case 1: h ^= (cast(uint)data[0]); 239 h *= m; 240 default: 241 }; 242 } 243 else 244 { 245 switch(len) 246 { 247 case 3: d |= (cast(uint)data[2]) << 16; 248 case 2: d |= (cast(uint)data[1]) << 8; 249 case 1: d |= (cast(uint)data[0]); 250 case 0: h ^= (t >> sr) | (d << sl); 251 h *= m; 252 default: 253 } 254 } 255 256 h ^= h >> 13; 257 h *= m; 258 h ^= h >> 15; 259 260 return h; 261 } 262 else 263 { 264 while(len >= 4) 265 { 266 uint k = *cast(uint *)data; 267 268 // MIX(h,k,m); 269 { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 270 271 data += 4; 272 len -= 4; 273 } 274 275 //---------- 276 // Handle tail bytes 277 278 switch(len) 279 { 280 case 3: h ^= (cast(uint)data[2]) << 16; 281 case 2: h ^= (cast(uint)data[1]) << 8; 282 case 1: h ^= (cast(uint)data[0]); 283 h *= m; 284 default: 285 }; 286 287 h ^= h >> 13; 288 h *= m; 289 h ^= h >> 15; 290 291 return h; 292 } 293 } 294 //----------------------------------------------------------------------------- 295 // murmurHashNeutral2, by Austin Appleby 296 297 // Same as murmurHash2, but endian- and alignment-neutral. 298 // Half the speed though, alas. 299 300 private uint murmurHashNeutral2 ( void * key, uint len, uint seed ) 301 { 302 const uint m = 0x5bd1e995; 303 const int r = 24; 304 305 uint h = seed ^ len; 306 307 ubyte * data = cast(ubyte *)key; 308 309 while(len >= 4) 310 { 311 uint k; 312 313 k = data[0]; 314 k |= (cast(uint)data[1]) << 8; 315 k |= (cast(uint)data[2]) << 16; 316 k |= (cast(uint)data[3]) << 24; 317 318 k *= m; 319 k ^= k >> r; 320 k *= m; 321 322 h *= m; 323 h ^= k; 324 325 data += 4; 326 len -= 4; 327 } 328 329 switch(len) 330 { 331 case 3: h ^= (cast(uint)data[2]) << 16; 332 case 2: h ^= (cast(uint)data[1]) << 8; 333 case 1: h ^= (cast(uint)data[0]); 334 h *= m; 335 default: 336 }; 337 338 h ^= h >> 13; 339 h *= m; 340 h ^= h >> 15; 341 342 return h; 343 } 344 345 //#define hashsize(n) (cast(ulong)1<<(n)) 346 //#define hashmask(n) (hashsize(n)-1) 347 348 /* 349 -------------------------------------------------------------------- 350 mix -- mix 3 64-bit values reversibly. 351 mix() takes 48 machine instructions, but only 24 cycles on a superscalar 352 machine (like Intel's new MMX architecture). It requires 4 64-bit 353 registers for 4::2 parallelism. 354 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of 355 (a,b,c), and all deltas of bottom bits were tested. All deltas were 356 tested both on random keys and on keys that were nearly all zero. 357 These deltas all cause every bit of c to change between 1/3 and 2/3 358 of the time (well, only 113/400 to 287/400 of the time for some 359 2-bit delta). These deltas all cause at least 80 bits to change 360 among (a,b,c) when the mix is run either forward or backward (yes it 361 is reversible). 362 This implies that a hash using mix64 has no funnels. There may be 363 characteristics with 3-bit deltas or bigger, I didn't test for 364 those. 365 -------------------------------------------------------------------- 366 */ 367 const char[] mix64abc=`{ 368 a -= b; a -= c; a ^= (c>>43); 369 b -= c; b -= a; b ^= (a<<9); 370 c -= a; c -= b; c ^= (b>>8); 371 a -= b; a -= c; a ^= (c>>38); 372 b -= c; b -= a; b ^= (a<<23); 373 c -= a; c -= b; c ^= (b>>5); 374 a -= b; a -= c; a ^= (c>>35); 375 b -= c; b -= a; b ^= (a<<49); 376 c -= a; c -= b; c ^= (b>>11); 377 a -= b; a -= c; a ^= (c>>12); 378 b -= c; b -= a; b ^= (a<<18); 379 c -= a; c -= b; c ^= (b>>22); 380 }`; 381 382 /* 383 -------------------------------------------------------------------- 384 hash() -- hash a variable-length key into a 64-bit value 385 k : the key (the unaligned variable-length array of bytes) 386 len : the length of the key, counting by bytes 387 level : can be any 8-byte value 388 Returns a 64-bit value. Every bit of the key affects every bit of 389 the return value. No funnels. Every 1-bit and 2-bit delta achieves 390 avalanche. About 41+5len instructions. 391 392 The best hash table sizes are powers of 2. There is no need to do 393 mod a prime (mod is sooo slow!). If you need less than 64 bits, 394 use a bitmask. For example, if you need only 10 bits, do 395 h = (h & hashmask(10)); 396 In which case, the hash table should have hashsize(10) elements. 397 398 If you are hashing n strings (ub1 **)k, do it like this: 399 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 400 401 By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may 402 use this code any way you wish, private, educational, or commercial, 403 but I would appreciate if you give me credit. 404 405 See http://burtleburtle.net/bob/hash/evahash.html 406 Use for hash table lookup, or anything where one collision in 2^^64 407 is acceptable. Do NOT use for cryptographic purposes. 408 -------------------------------------------------------------------- 409 */ 410 411 ulong lookup8_hash( ubyte* k, ulong length, ulong level) 412 { 413 ulong a,b,c,len; 414 415 /* Set up the internal state */ 416 len = length; 417 a = b = level; /* the previous hash value */ 418 c = 0x9e3779b97f4a7c13UL; /* the golden ratio; an arbitrary value */ 419 420 /*---------------------------------------- handle most of the key */ 421 while (len >= 24) 422 { 423 a += (k[0] +(cast(ulong)k[ 1]<< 8)+(cast(ulong)k[ 2]<<16)+(cast(ulong)k[ 3]<<24) 424 +(cast(ulong)k[4 ]<<32)+(cast(ulong)k[ 5]<<40)+(cast(ulong)k[ 6]<<48)+(cast(ulong)k[ 7]<<56)); 425 b += (k[8] +(cast(ulong)k[ 9]<< 8)+(cast(ulong)k[10]<<16)+(cast(ulong)k[11]<<24) 426 +(cast(ulong)k[12]<<32)+(cast(ulong)k[13]<<40)+(cast(ulong)k[14]<<48)+(cast(ulong)k[15]<<56)); 427 c += (k[16] +(cast(ulong)k[17]<< 8)+(cast(ulong)k[18]<<16)+(cast(ulong)k[19]<<24) 428 +(cast(ulong)k[20]<<32)+(cast(ulong)k[21]<<40)+(cast(ulong)k[22]<<48)+(cast(ulong)k[23]<<56)); 429 mixin(mix64abc); 430 k += 24; len -= 24; 431 } 432 433 /*------------------------------------- handle the last 23 bytes */ 434 c += length; 435 switch(len) /* all the case statements fall through */ 436 { 437 case 23: c+=(cast(ulong)k[22]<<56); 438 case 22: c+=(cast(ulong)k[21]<<48); 439 case 21: c+=(cast(ulong)k[20]<<40); 440 case 20: c+=(cast(ulong)k[19]<<32); 441 case 19: c+=(cast(ulong)k[18]<<24); 442 case 18: c+=(cast(ulong)k[17]<<16); 443 case 17: c+=(cast(ulong)k[16]<<8); 444 /* the first byte of c is reserved for the length */ 445 case 16: b+=(cast(ulong)k[15]<<56); 446 case 15: b+=(cast(ulong)k[14]<<48); 447 case 14: b+=(cast(ulong)k[13]<<40); 448 case 13: b+=(cast(ulong)k[12]<<32); 449 case 12: b+=(cast(ulong)k[11]<<24); 450 case 11: b+=(cast(ulong)k[10]<<16); 451 case 10: b+=(cast(ulong)k[ 9]<<8); 452 case 9: b+=(cast(ulong)k[ 8]); 453 case 8: a+=(cast(ulong)k[ 7]<<56); 454 case 7: a+=(cast(ulong)k[ 6]<<48); 455 case 6: a+=(cast(ulong)k[ 5]<<40); 456 case 5: a+=(cast(ulong)k[ 4]<<32); 457 case 4: a+=(cast(ulong)k[ 3]<<24); 458 case 3: a+=(cast(ulong)k[ 2]<<16); 459 case 2: a+=(cast(ulong)k[ 1]<<8); 460 case 1: a+=(cast(ulong)k[ 0]); 461 /* case 0: nothing left to add */ 462 default: 463 } 464 mixin(mix64abc); 465 /*-------------------------------------------- report the result */ 466 return c; 467 } 468 469 /* 470 -------------------------------------------------------------------- 471 This works on all machines, is identical to hash() on little-endian 472 machines, and it is much faster than hash(), but it requires 473 -- that the key be an array of ub8's, and 474 -- that all your machines have the same endianness, and 475 -- that the length be the number of ub8's in the key 476 -------------------------------------------------------------------- 477 */ 478 ulong lookup8_hash2( ulong *k, ulong length, ulong level) 479 { 480 ulong a,b,c,len; 481 482 /* Set up the internal state */ 483 len = length; 484 a = b = level; /* the previous hash value */ 485 c = 0x9e3779b97f4a7c13UL; /* the golden ratio; an arbitrary value */ 486 487 /*---------------------------------------- handle most of the key */ 488 while (len >= 3) 489 { 490 a += k[0]; 491 b += k[1]; 492 c += k[2]; 493 mixin(mix64abc); 494 k += 3; len -= 3; 495 } 496 497 /*-------------------------------------- handle the last 2 ub8's */ 498 c += (length<<3); 499 switch(len) /* all the case statements fall through */ 500 { 501 /* c is reserved for the length */ 502 case 2: b+=k[1]; 503 case 1: a+=k[0]; 504 /* case 0: nothing left to add */ 505 default: 506 } 507 mixin(mix64abc); 508 /*-------------------------------------------- report the result */ 509 return c; 510 } 511 512 debug(UnitTest){ 513 void alignIndependence(T)(ubyte[] val,T function (void*,T len,T oldHash)hashF){ 514 ubyte[] vval=new ubyte[val.length+16]; 515 T oldV=0xdeadbeef; 516 T hash=hashF(val.ptr,val.length,oldV); 517 for(int i=0;i<16;++i){ 518 vval[i..i+val.length]=val; 519 T hashAtt=hashF(vval.ptr+i,val.length,oldV); 520 assert(hashAtt==hash); 521 } 522 } 523 524 unittest{ 525 ubyte[] val=cast(ubyte[])"blaterare"; 526 alignIndependence!(uint)(val,&murmurHashNeutral2); 527 alignIndependence!(uint)(val,&murmurHashAligned2); 528 alignIndependence!(ulong)(val, 529 function ulong(void* str,ulong len,ulong oldV){ 530 return lookup8_hash(cast(ubyte*)str,len,oldV); 531 } 532 ); 533 } 534 }