jenkins_hash.c revision 193854
1#ifndef __LIBKERN_JENKINS_H__ 2#define __LIBKERN_JENKINS_H__ 3/* 4 * Taken from http://burtleburtle.net/bob/c/lookup3.c 5 * $FreeBSD: head/sys/libkern/jenkins.h 193854 2009-06-09 20:21:40Z kmacy $ 6 */ 7 8#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 9 10/* 11------------------------------------------------------------------------------- 12mix -- mix 3 32-bit values reversibly. 13 14This is reversible, so any information in (a,b,c) before mix() is 15still in (a,b,c) after mix(). 16 17If four pairs of (a,b,c) inputs are run through mix(), or through 18mix() in reverse, there are at least 32 bits of the output that 19are sometimes the same for one pair and different for another pair. 20This was tested for: 21* pairs that differed by one bit, by two bits, in any combination 22 of top bits of (a,b,c), or in any combination of bottom bits of 23 (a,b,c). 24* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 25 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 26 is commonly produced by subtraction) look like a single 1-bit 27 difference. 28* the base values were pseudorandom, all zero but one bit set, or 29 all zero plus a counter that starts at zero. 30 31Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 32satisfy this are 33 4 6 8 16 19 4 34 9 15 3 18 27 15 35 14 9 3 7 17 3 36Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 37for "differ" defined as + with a one-bit base and a two-bit delta. I 38used http://burtleburtle.net/bob/hash/avalanche.html to choose 39the operations, constants, and arrangements of the variables. 40 41This does not achieve avalanche. There are input bits of (a,b,c) 42that fail to affect some output bits of (a,b,c), especially of a. The 43most thoroughly mixed value is c, but it doesn't really even achieve 44avalanche in c. 45 46This allows some parallelism. Read-after-writes are good at doubling 47the number of bits affected, so the goal of mixing pulls in the opposite 48direction as the goal of parallelism. I did what I could. Rotates 49seem to cost as much as shifts on every machine I could lay my hands 50on, and rotates are much kinder to the top and bottom bits, so I used 51rotates. 52------------------------------------------------------------------------------- 53*/ 54#define mix(a,b,c) \ 55{ \ 56 a -= c; a ^= rot(c, 4); c += b; \ 57 b -= a; b ^= rot(a, 6); a += c; \ 58 c -= b; c ^= rot(b, 8); b += a; \ 59 a -= c; a ^= rot(c,16); c += b; \ 60 b -= a; b ^= rot(a,19); a += c; \ 61 c -= b; c ^= rot(b, 4); b += a; \ 62} 63 64/* 65------------------------------------------------------------------------------- 66final -- final mixing of 3 32-bit values (a,b,c) into c 67 68Pairs of (a,b,c) values differing in only a few bits will usually 69produce values of c that look totally different. This was tested for 70* pairs that differed by one bit, by two bits, in any combination 71 of top bits of (a,b,c), or in any combination of bottom bits of 72 (a,b,c). 73* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 74 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 75 is commonly produced by subtraction) look like a single 1-bit 76 difference. 77* the base values were pseudorandom, all zero but one bit set, or 78 all zero plus a counter that starts at zero. 79 80These constants passed: 81 14 11 25 16 4 14 24 82 12 14 25 16 4 14 24 83and these came close: 84 4 8 15 26 3 22 24 85 10 8 15 26 3 22 24 86 11 8 15 26 3 22 24 87------------------------------------------------------------------------------- 88*/ 89#define final(a,b,c) \ 90{ \ 91 c ^= b; c -= rot(b,14); \ 92 a ^= c; a -= rot(c,11); \ 93 b ^= a; b -= rot(a,25); \ 94 c ^= b; c -= rot(b,16); \ 95 a ^= c; a -= rot(c,4); \ 96 b ^= a; b -= rot(a,14); \ 97 c ^= b; c -= rot(b,24); \ 98} 99 100/* 101-------------------------------------------------------------------- 102 This works on all machines. To be useful, it requires 103 -- that the key be an array of uint32_t's, and 104 -- that the length be the number of uint32_t's in the key 105 106 The function hashword() is identical to hashlittle() on little-endian 107 machines, and identical to hashbig() on big-endian machines, 108 except that the length has to be measured in uint32_ts rather than in 109 bytes. hashlittle() is more complicated than hashword() only because 110 hashlittle() has to dance around fitting the key bytes into registers. 111-------------------------------------------------------------------- 112*/ 113static uint32_t 114jenkins_hashword( 115 const uint32_t *k, /* the key, an array of uint32_t values */ 116 size_t length, /* the length of the key, in uint32_ts */ 117 uint32_t initval /* the previous hash, or an arbitrary value */ 118) 119{ 120 uint32_t a,b,c; 121 122 /* Set up the internal state */ 123 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; 124 125 /*------------------------------------------------- handle most of the key */ 126 while (length > 3) 127 { 128 a += k[0]; 129 b += k[1]; 130 c += k[2]; 131 mix(a,b,c); 132 length -= 3; 133 k += 3; 134 } 135 136 /*------------------------------------------- handle the last 3 uint32_t's */ 137 switch(length) /* all the case statements fall through */ 138 { 139 case 3 : c+=k[2]; 140 case 2 : b+=k[1]; 141 case 1 : a+=k[0]; 142 final(a,b,c); 143 case 0: /* case 0: nothing left to add */ 144 break; 145 } 146 /*------------------------------------------------------ report the result */ 147 return c; 148} 149#endif 150