1/*
2 * Taken from http://burtleburtle.net/bob/c/lookup3.c
3 */
4
5#include <sys/hash.h>
6#include <machine/endian.h>
7
8/*
9-------------------------------------------------------------------------------
10lookup3.c, by Bob Jenkins, May 2006, Public Domain.
11
12These are functions for producing 32-bit hashes for hash table lookup.
13hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
14are externally useful functions.  Routines to test the hash are included
15if SELF_TEST is defined.  You can use this free for any purpose.  It's in
16the public domain.  It has no warranty.
17
18You probably want to use hashlittle().  hashlittle() and hashbig()
19hash byte arrays.  hashlittle() is faster than hashbig() on
20little-endian machines.  Intel and AMD are little-endian machines.
21On second thought, you probably want hashlittle2(), which is identical to
22hashlittle() except it returns two 32-bit hashes for the price of one.
23You could implement hashbig2() if you wanted but I haven't bothered here.
24
25If you want to find a hash of, say, exactly 7 integers, do
26  a = i1;  b = i2;  c = i3;
27  mix(a,b,c);
28  a += i4; b += i5; c += i6;
29  mix(a,b,c);
30  a += i7;
31  final(a,b,c);
32then use c as the hash value.  If you have a variable length array of
334-byte integers to hash, use hashword().  If you have a byte array (like
34a character string), use hashlittle().  If you have several byte arrays, or
35a mix of things, see the comments above hashlittle().
36
37Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
38then mix those integers.  This is fast (you can do a lot more thorough
39mixing with 12*3 instructions on 3 integers than you can with 3 instructions
40on 1 byte), but shoehorning those bytes into integers efficiently is messy.
41-------------------------------------------------------------------------------
42*/
43
44#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
45
46/*
47-------------------------------------------------------------------------------
48mix -- mix 3 32-bit values reversibly.
49
50This is reversible, so any information in (a,b,c) before mix() is
51still in (a,b,c) after mix().
52
53If four pairs of (a,b,c) inputs are run through mix(), or through
54mix() in reverse, there are at least 32 bits of the output that
55are sometimes the same for one pair and different for another pair.
56This was tested for:
57* pairs that differed by one bit, by two bits, in any combination
58  of top bits of (a,b,c), or in any combination of bottom bits of
59  (a,b,c).
60* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
61  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
62  is commonly produced by subtraction) look like a single 1-bit
63  difference.
64* the base values were pseudorandom, all zero but one bit set, or
65  all zero plus a counter that starts at zero.
66
67Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
68satisfy this are
69    4  6  8 16 19  4
70    9 15  3 18 27 15
71   14  9  3  7 17  3
72Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
73for "differ" defined as + with a one-bit base and a two-bit delta.  I
74used http://burtleburtle.net/bob/hash/avalanche.html to choose
75the operations, constants, and arrangements of the variables.
76
77This does not achieve avalanche.  There are input bits of (a,b,c)
78that fail to affect some output bits of (a,b,c), especially of a.  The
79most thoroughly mixed value is c, but it doesn't really even achieve
80avalanche in c.
81
82This allows some parallelism.  Read-after-writes are good at doubling
83the number of bits affected, so the goal of mixing pulls in the opposite
84direction as the goal of parallelism.  I did what I could.  Rotates
85seem to cost as much as shifts on every machine I could lay my hands
86on, and rotates are much kinder to the top and bottom bits, so I used
87rotates.
88-------------------------------------------------------------------------------
89*/
90#define mix(a,b,c) \
91{ \
92  a -= c;  a ^= rot(c, 4);  c += b; \
93  b -= a;  b ^= rot(a, 6);  a += c; \
94  c -= b;  c ^= rot(b, 8);  b += a; \
95  a -= c;  a ^= rot(c,16);  c += b; \
96  b -= a;  b ^= rot(a,19);  a += c; \
97  c -= b;  c ^= rot(b, 4);  b += a; \
98}
99
100/*
101-------------------------------------------------------------------------------
102final -- final mixing of 3 32-bit values (a,b,c) into c
103
104Pairs of (a,b,c) values differing in only a few bits will usually
105produce values of c that look totally different.  This was tested for
106* pairs that differed by one bit, by two bits, in any combination
107  of top bits of (a,b,c), or in any combination of bottom bits of
108  (a,b,c).
109* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
110  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
111  is commonly produced by subtraction) look like a single 1-bit
112  difference.
113* the base values were pseudorandom, all zero but one bit set, or
114  all zero plus a counter that starts at zero.
115
116These constants passed:
117 14 11 25 16 4 14 24
118 12 14 25 16 4 14 24
119and these came close:
120  4  8 15 26 3 22 24
121 10  8 15 26 3 22 24
122 11  8 15 26 3 22 24
123-------------------------------------------------------------------------------
124*/
125#define final(a,b,c) \
126{ \
127  c ^= b; c -= rot(b,14); \
128  a ^= c; a -= rot(c,11); \
129  b ^= a; b -= rot(a,25); \
130  c ^= b; c -= rot(b,16); \
131  a ^= c; a -= rot(c,4);  \
132  b ^= a; b -= rot(a,14); \
133  c ^= b; c -= rot(b,24); \
134}
135
136/*
137--------------------------------------------------------------------
138 This works on all machines.  To be useful, it requires
139 -- that the key be an array of uint32_t's, and
140 -- that the length be the number of uint32_t's in the key
141
142 The function hashword() is identical to hashlittle() on little-endian
143 machines, and identical to hashbig() on big-endian machines,
144 except that the length has to be measured in uint32_ts rather than in
145 bytes.  hashlittle() is more complicated than hashword() only because
146 hashlittle() has to dance around fitting the key bytes into registers.
147--------------------------------------------------------------------
148*/
149uint32_t jenkins_hash32(
150const uint32_t *k,                   /* the key, an array of uint32_t values */
151size_t          length,               /* the length of the key, in uint32_ts */
152uint32_t        initval)         /* the previous hash, or an arbitrary value */
153{
154  uint32_t a,b,c;
155
156  /* Set up the internal state */
157  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
158
159  /*------------------------------------------------- handle most of the key */
160  while (length > 3)
161  {
162    a += k[0];
163    b += k[1];
164    c += k[2];
165    mix(a,b,c);
166    length -= 3;
167    k += 3;
168  }
169
170  /*------------------------------------------- handle the last 3 uint32_t's */
171  switch(length)                     /* all the case statements fall through */
172  {
173  case 3 : c+=k[2];
174  case 2 : b+=k[1];
175  case 1 : a+=k[0];
176    final(a,b,c);
177  case 0:     /* case 0: nothing left to add */
178    break;
179  }
180  /*------------------------------------------------------ report the result */
181  return c;
182}
183
184#if BYTE_ORDER == LITTLE_ENDIAN
185/*
186-------------------------------------------------------------------------------
187hashlittle() -- hash a variable-length key into a 32-bit value
188  k       : the key (the unaligned variable-length array of bytes)
189  length  : the length of the key, counting by bytes
190  initval : can be any 4-byte value
191Returns a 32-bit value.  Every bit of the key affects every bit of
192the return value.  Two keys differing by one or two bits will have
193totally different hash values.
194
195The best hash table sizes are powers of 2.  There is no need to do
196mod a prime (mod is sooo slow!).  If you need less than 32 bits,
197use a bitmask.  For example, if you need only 10 bits, do
198  h = (h & hashmask(10));
199In which case, the hash table should have hashsize(10) elements.
200
201If you are hashing n strings (uint8_t **)k, do it like this:
202  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
203
204By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
205code any way you wish, private, educational, or commercial.  It's free.
206
207Use for hash table lookup, or anything where one collision in 2^^32 is
208acceptable.  Do NOT use for cryptographic purposes.
209-------------------------------------------------------------------------------
210*/
211
212uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
213{
214  uint32_t a,b,c;                                          /* internal state */
215  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
216
217  /* Set up the internal state */
218  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
219
220  u.ptr = key;
221  if ((u.i & 0x3) == 0) {
222    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
223
224    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
225    while (length > 12)
226    {
227      a += k[0];
228      b += k[1];
229      c += k[2];
230      mix(a,b,c);
231      length -= 12;
232      k += 3;
233    }
234
235    /*----------------------------- handle the last (probably partial) block */
236    /*
237     * "k[2]&0xffffff" actually reads beyond the end of the string, but
238     * then masks off the part it's not allowed to read.  Because the
239     * string is aligned, the masked-off tail is in the same word as the
240     * rest of the string.  Every machine with memory protection I've seen
241     * does it on word boundaries, so is OK with this.  But VALGRIND will
242     * still catch it and complain.  The masking trick does make the hash
243     * noticeably faster for short strings (like English words).
244     */
245
246    switch(length)
247    {
248    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
249    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
250    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
251    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
252    case 8 : b+=k[1]; a+=k[0]; break;
253    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
254    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
255    case 5 : b+=k[1]&0xff; a+=k[0]; break;
256    case 4 : a+=k[0]; break;
257    case 3 : a+=k[0]&0xffffff; break;
258    case 2 : a+=k[0]&0xffff; break;
259    case 1 : a+=k[0]&0xff; break;
260    case 0 : return c;              /* zero length strings require no mixing */
261    }
262
263  } else if ((u.i & 0x1) == 0) {
264    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
265    const uint8_t  *k8;
266
267    /*--------------- all but last block: aligned reads and different mixing */
268    while (length > 12)
269    {
270      a += k[0] + (((uint32_t)k[1])<<16);
271      b += k[2] + (((uint32_t)k[3])<<16);
272      c += k[4] + (((uint32_t)k[5])<<16);
273      mix(a,b,c);
274      length -= 12;
275      k += 6;
276    }
277
278    /*----------------------------- handle the last (probably partial) block */
279    k8 = (const uint8_t *)k;
280    switch(length)
281    {
282    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
283             b+=k[2]+(((uint32_t)k[3])<<16);
284             a+=k[0]+(((uint32_t)k[1])<<16);
285             break;
286    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
287    case 10: c+=k[4];
288             b+=k[2]+(((uint32_t)k[3])<<16);
289             a+=k[0]+(((uint32_t)k[1])<<16);
290             break;
291    case 9 : c+=k8[8];                      /* fall through */
292    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
293             a+=k[0]+(((uint32_t)k[1])<<16);
294             break;
295    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
296    case 6 : b+=k[2];
297             a+=k[0]+(((uint32_t)k[1])<<16);
298             break;
299    case 5 : b+=k8[4];                      /* fall through */
300    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
301             break;
302    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
303    case 2 : a+=k[0];
304             break;
305    case 1 : a+=k8[0];
306             break;
307    case 0 : return c;                     /* zero length requires no mixing */
308    }
309
310  } else {                        /* need to read the key one byte at a time */
311    const uint8_t *k = (const uint8_t *)key;
312
313    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
314    while (length > 12)
315    {
316      a += k[0];
317      a += ((uint32_t)k[1])<<8;
318      a += ((uint32_t)k[2])<<16;
319      a += ((uint32_t)k[3])<<24;
320      b += k[4];
321      b += ((uint32_t)k[5])<<8;
322      b += ((uint32_t)k[6])<<16;
323      b += ((uint32_t)k[7])<<24;
324      c += k[8];
325      c += ((uint32_t)k[9])<<8;
326      c += ((uint32_t)k[10])<<16;
327      c += ((uint32_t)k[11])<<24;
328      mix(a,b,c);
329      length -= 12;
330      k += 12;
331    }
332
333    /*-------------------------------- last block: affect all 32 bits of (c) */
334    switch(length)                   /* all the case statements fall through */
335    {
336    case 12: c+=((uint32_t)k[11])<<24;
337    case 11: c+=((uint32_t)k[10])<<16;
338    case 10: c+=((uint32_t)k[9])<<8;
339    case 9 : c+=k[8];
340    case 8 : b+=((uint32_t)k[7])<<24;
341    case 7 : b+=((uint32_t)k[6])<<16;
342    case 6 : b+=((uint32_t)k[5])<<8;
343    case 5 : b+=k[4];
344    case 4 : a+=((uint32_t)k[3])<<24;
345    case 3 : a+=((uint32_t)k[2])<<16;
346    case 2 : a+=((uint32_t)k[1])<<8;
347    case 1 : a+=k[0];
348             break;
349    case 0 : return c;
350    }
351  }
352
353  final(a,b,c);
354  return c;
355}
356
357#else /* !(BYTE_ORDER == LITTLE_ENDIAN) */
358
359/*
360 * hashbig():
361 * This is the same as hashword() on big-endian machines.  It is different
362 * from hashlittle() on all machines.  hashbig() takes advantage of
363 * big-endian byte ordering.
364 */
365uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
366{
367  uint32_t a,b,c;
368  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
369
370  /* Set up the internal state */
371  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
372
373  u.ptr = key;
374  if ((u.i & 0x3) == 0) {
375    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
376
377    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
378    while (length > 12)
379    {
380      a += k[0];
381      b += k[1];
382      c += k[2];
383      mix(a,b,c);
384      length -= 12;
385      k += 3;
386    }
387
388    /*----------------------------- handle the last (probably partial) block */
389    /*
390     * "k[2]<<8" actually reads beyond the end of the string, but
391     * then shifts out the part it's not allowed to read.  Because the
392     * string is aligned, the illegal read is in the same word as the
393     * rest of the string.  Every machine with memory protection I've seen
394     * does it on word boundaries, so is OK with this.  But VALGRIND will
395     * still catch it and complain.  The masking trick does make the hash
396     * noticeably faster for short strings (like English words).
397     */
398
399    switch(length)
400    {
401    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
402    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
403    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
404    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
405    case 8 : b+=k[1]; a+=k[0]; break;
406    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
407    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
408    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
409    case 4 : a+=k[0]; break;
410    case 3 : a+=k[0]&0xffffff00; break;
411    case 2 : a+=k[0]&0xffff0000; break;
412    case 1 : a+=k[0]&0xff000000; break;
413    case 0 : return c;              /* zero length strings require no mixing */
414    }
415
416  } else {                        /* need to read the key one byte at a time */
417    const uint8_t *k = (const uint8_t *)key;
418
419    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
420    while (length > 12)
421    {
422      a += ((uint32_t)k[0])<<24;
423      a += ((uint32_t)k[1])<<16;
424      a += ((uint32_t)k[2])<<8;
425      a += ((uint32_t)k[3]);
426      b += ((uint32_t)k[4])<<24;
427      b += ((uint32_t)k[5])<<16;
428      b += ((uint32_t)k[6])<<8;
429      b += ((uint32_t)k[7]);
430      c += ((uint32_t)k[8])<<24;
431      c += ((uint32_t)k[9])<<16;
432      c += ((uint32_t)k[10])<<8;
433      c += ((uint32_t)k[11]);
434      mix(a,b,c);
435      length -= 12;
436      k += 12;
437    }
438
439    /*-------------------------------- last block: affect all 32 bits of (c) */
440    switch(length)                   /* all the case statements fall through */
441    {
442    case 12: c+=k[11];
443    case 11: c+=((uint32_t)k[10])<<8;
444    case 10: c+=((uint32_t)k[9])<<16;
445    case 9 : c+=((uint32_t)k[8])<<24;
446    case 8 : b+=k[7];
447    case 7 : b+=((uint32_t)k[6])<<8;
448    case 6 : b+=((uint32_t)k[5])<<16;
449    case 5 : b+=((uint32_t)k[4])<<24;
450    case 4 : a+=k[3];
451    case 3 : a+=((uint32_t)k[2])<<8;
452    case 2 : a+=((uint32_t)k[1])<<16;
453    case 1 : a+=((uint32_t)k[0])<<24;
454             break;
455    case 0 : return c;
456    }
457  }
458
459  final(a,b,c);
460  return c;
461}
462#endif
463