jenkins_hash.c revision 193854
1#ifndef __LIBKERN_JENKINS_H__
2#define __LIBKERN_JENKINS_H__
3/*
4 * Taken from http://burtleburtle.net/bob/c/lookup3.c
5 * $FreeBSD: head/sys/libkern/jenkins.h 193854 2009-06-09 20:21:40Z kmacy $
6 */
7
8#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
9
10/*
11-------------------------------------------------------------------------------
12mix -- mix 3 32-bit values reversibly.
13
14This is reversible, so any information in (a,b,c) before mix() is
15still in (a,b,c) after mix().
16
17If four pairs of (a,b,c) inputs are run through mix(), or through
18mix() in reverse, there are at least 32 bits of the output that
19are sometimes the same for one pair and different for another pair.
20This was tested for:
21* pairs that differed by one bit, by two bits, in any combination
22  of top bits of (a,b,c), or in any combination of bottom bits of
23  (a,b,c).
24* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
25  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
26  is commonly produced by subtraction) look like a single 1-bit
27  difference.
28* the base values were pseudorandom, all zero but one bit set, or
29  all zero plus a counter that starts at zero.
30
31Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
32satisfy this are
33    4  6  8 16 19  4
34    9 15  3 18 27 15
35   14  9  3  7 17  3
36Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
37for "differ" defined as + with a one-bit base and a two-bit delta.  I
38used http://burtleburtle.net/bob/hash/avalanche.html to choose
39the operations, constants, and arrangements of the variables.
40
41This does not achieve avalanche.  There are input bits of (a,b,c)
42that fail to affect some output bits of (a,b,c), especially of a.  The
43most thoroughly mixed value is c, but it doesn't really even achieve
44avalanche in c.
45
46This allows some parallelism.  Read-after-writes are good at doubling
47the number of bits affected, so the goal of mixing pulls in the opposite
48direction as the goal of parallelism.  I did what I could.  Rotates
49seem to cost as much as shifts on every machine I could lay my hands
50on, and rotates are much kinder to the top and bottom bits, so I used
51rotates.
52-------------------------------------------------------------------------------
53*/
54#define mix(a,b,c) \
55{ \
56  a -= c;  a ^= rot(c, 4);  c += b; \
57  b -= a;  b ^= rot(a, 6);  a += c; \
58  c -= b;  c ^= rot(b, 8);  b += a; \
59  a -= c;  a ^= rot(c,16);  c += b; \
60  b -= a;  b ^= rot(a,19);  a += c; \
61  c -= b;  c ^= rot(b, 4);  b += a; \
62}
63
64/*
65-------------------------------------------------------------------------------
66final -- final mixing of 3 32-bit values (a,b,c) into c
67
68Pairs of (a,b,c) values differing in only a few bits will usually
69produce values of c that look totally different.  This was tested for
70* pairs that differed by one bit, by two bits, in any combination
71  of top bits of (a,b,c), or in any combination of bottom bits of
72  (a,b,c).
73* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
74  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
75  is commonly produced by subtraction) look like a single 1-bit
76  difference.
77* the base values were pseudorandom, all zero but one bit set, or
78  all zero plus a counter that starts at zero.
79
80These constants passed:
81 14 11 25 16 4 14 24
82 12 14 25 16 4 14 24
83and these came close:
84  4  8 15 26 3 22 24
85 10  8 15 26 3 22 24
86 11  8 15 26 3 22 24
87-------------------------------------------------------------------------------
88*/
89#define final(a,b,c) \
90{ \
91  c ^= b; c -= rot(b,14); \
92  a ^= c; a -= rot(c,11); \
93  b ^= a; b -= rot(a,25); \
94  c ^= b; c -= rot(b,16); \
95  a ^= c; a -= rot(c,4);  \
96  b ^= a; b -= rot(a,14); \
97  c ^= b; c -= rot(b,24); \
98}
99
100/*
101--------------------------------------------------------------------
102 This works on all machines.  To be useful, it requires
103 -- that the key be an array of uint32_t's, and
104 -- that the length be the number of uint32_t's in the key
105
106 The function hashword() is identical to hashlittle() on little-endian
107 machines, and identical to hashbig() on big-endian machines,
108 except that the length has to be measured in uint32_ts rather than in
109 bytes.  hashlittle() is more complicated than hashword() only because
110 hashlittle() has to dance around fitting the key bytes into registers.
111--------------------------------------------------------------------
112*/
113static uint32_t
114jenkins_hashword(
115                const uint32_t *k,  /* the key, an array of uint32_t values */
116                size_t length,      /* the length of the key, in uint32_ts */
117                uint32_t initval    /* the previous hash, or an arbitrary value */
118)
119{
120  uint32_t a,b,c;
121
122  /* Set up the internal state */
123  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
124
125  /*------------------------------------------------- handle most of the key */
126  while (length > 3)
127  {
128    a += k[0];
129    b += k[1];
130    c += k[2];
131    mix(a,b,c);
132    length -= 3;
133    k += 3;
134  }
135
136  /*------------------------------------------- handle the last 3 uint32_t's */
137  switch(length)                     /* all the case statements fall through */
138  {
139  case 3 : c+=k[2];
140  case 2 : b+=k[1];
141  case 1 : a+=k[0];
142    final(a,b,c);
143  case 0:     /* case 0: nothing left to add */
144    break;
145  }
146  /*------------------------------------------------------ report the result */
147  return c;
148}
149#endif
150