1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr_private.h"
18
19#include "apr_general.h"
20#include "apr_pools.h"
21#include "apr_time.h"
22
23#include "apr_hash.h"
24
25#if APR_HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28#if APR_HAVE_STRING_H
29#include <string.h>
30#endif
31
32#if APR_POOL_DEBUG && APR_HAVE_STDIO_H
33#include <stdio.h>
34#endif
35
36/*
37 * The internal form of a hash table.
38 *
39 * The table is an array indexed by the hash of the key; collisions
40 * are resolved by hanging a linked list of hash entries off each
41 * element of the array. Although this is a really simple design it
42 * isn't too bad given that pools have a low allocation overhead.
43 */
44
45typedef struct apr_hash_entry_t apr_hash_entry_t;
46
47struct apr_hash_entry_t {
48    apr_hash_entry_t *next;
49    unsigned int      hash;
50    const void       *key;
51    apr_ssize_t       klen;
52    const void       *val;
53};
54
55/*
56 * Data structure for iterating through a hash table.
57 *
58 * We keep a pointer to the next hash entry here to allow the current
59 * hash entry to be freed or otherwise mangled between calls to
60 * apr_hash_next().
61 */
62struct apr_hash_index_t {
63    apr_hash_t         *ht;
64    apr_hash_entry_t   *this, *next;
65    unsigned int        index;
66};
67
68/*
69 * The size of the array is always a power of two. We use the maximum
70 * index rather than the size so that we can use bitwise-AND for
71 * modular arithmetic.
72 * The count of hash entries may be greater depending on the chosen
73 * collision rate.
74 */
75struct apr_hash_t {
76    apr_pool_t          *pool;
77    apr_hash_entry_t   **array;
78    apr_hash_index_t     iterator;  /* For apr_hash_first(NULL, ...) */
79    unsigned int         count, max, seed;
80    apr_hashfunc_t       hash_func;
81    apr_hash_entry_t    *free;  /* List of recycled entries */
82};
83
84#define INITIAL_MAX 15 /* tunable == 2^n - 1 */
85
86
87/*
88 * Hash creation functions.
89 */
90
91static apr_hash_entry_t **alloc_array(apr_hash_t *ht, unsigned int max)
92{
93   return apr_pcalloc(ht->pool, sizeof(*ht->array) * (max + 1));
94}
95
96APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool)
97{
98    apr_hash_t *ht;
99    apr_time_t now = apr_time_now();
100
101    ht = apr_palloc(pool, sizeof(apr_hash_t));
102    ht->pool = pool;
103    ht->free = NULL;
104    ht->count = 0;
105    ht->max = INITIAL_MAX;
106    ht->seed = (unsigned int)((now >> 32) ^ now ^ (apr_uintptr_t)pool ^
107                              (apr_uintptr_t)ht ^ (apr_uintptr_t)&now) - 1;
108    ht->array = alloc_array(ht, ht->max);
109    ht->hash_func = NULL;
110
111    return ht;
112}
113
114APR_DECLARE(apr_hash_t *) apr_hash_make_custom(apr_pool_t *pool,
115                                               apr_hashfunc_t hash_func)
116{
117    apr_hash_t *ht = apr_hash_make(pool);
118    ht->hash_func = hash_func;
119    return ht;
120}
121
122
123/*
124 * Hash iteration functions.
125 */
126
127APR_DECLARE(apr_hash_index_t *) apr_hash_next(apr_hash_index_t *hi)
128{
129    hi->this = hi->next;
130    while (!hi->this) {
131        if (hi->index > hi->ht->max)
132            return NULL;
133
134        hi->this = hi->ht->array[hi->index++];
135    }
136    hi->next = hi->this->next;
137    return hi;
138}
139
140APR_DECLARE(apr_hash_index_t *) apr_hash_first(apr_pool_t *p, apr_hash_t *ht)
141{
142    apr_hash_index_t *hi;
143    if (p)
144        hi = apr_palloc(p, sizeof(*hi));
145    else
146        hi = &ht->iterator;
147
148    hi->ht = ht;
149    hi->index = 0;
150    hi->this = NULL;
151    hi->next = NULL;
152    return apr_hash_next(hi);
153}
154
155APR_DECLARE(void) apr_hash_this(apr_hash_index_t *hi,
156                                const void **key,
157                                apr_ssize_t *klen,
158                                void **val)
159{
160    if (key)  *key  = hi->this->key;
161    if (klen) *klen = hi->this->klen;
162    if (val)  *val  = (void *)hi->this->val;
163}
164
165
166/*
167 * Expanding a hash table
168 */
169
170static void expand_array(apr_hash_t *ht)
171{
172    apr_hash_index_t *hi;
173    apr_hash_entry_t **new_array;
174    unsigned int new_max;
175
176    new_max = ht->max * 2 + 1;
177    new_array = alloc_array(ht, new_max);
178    for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi)) {
179        unsigned int i = hi->this->hash & new_max;
180        hi->this->next = new_array[i];
181        new_array[i] = hi->this;
182    }
183    ht->array = new_array;
184    ht->max = new_max;
185}
186
187static unsigned int hashfunc_default(const char *char_key, apr_ssize_t *klen,
188                                     unsigned int hash)
189{
190    const unsigned char *key = (const unsigned char *)char_key;
191    const unsigned char *p;
192    apr_ssize_t i;
193
194    /*
195     * This is the popular `times 33' hash algorithm which is used by
196     * perl and also appears in Berkeley DB. This is one of the best
197     * known hash functions for strings because it is both computed
198     * very fast and distributes very well.
199     *
200     * The originator may be Dan Bernstein but the code in Berkeley DB
201     * cites Chris Torek as the source. The best citation I have found
202     * is "Chris Torek, Hash function for text in C, Usenet message
203     * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
204     * Salz's USENIX 1992 paper about INN which can be found at
205     * <http://citeseer.nj.nec.com/salz92internetnews.html>.
206     *
207     * The magic of number 33, i.e. why it works better than many other
208     * constants, prime or not, has never been adequately explained by
209     * anyone. So I try an explanation: if one experimentally tests all
210     * multipliers between 1 and 256 (as I did while writing a low-level
211     * data structure library some time ago) one detects that even
212     * numbers are not useable at all. The remaining 128 odd numbers
213     * (except for the number 1) work more or less all equally well.
214     * They all distribute in an acceptable way and this way fill a hash
215     * table with an average percent of approx. 86%.
216     *
217     * If one compares the chi^2 values of the variants (see
218     * Bob Jenkins ``Hashing Frequently Asked Questions'' at
219     * http://burtleburtle.net/bob/hash/hashfaq.html for a description
220     * of chi^2), the number 33 not even has the best value. But the
221     * number 33 and a few other equally good numbers like 17, 31, 63,
222     * 127 and 129 have nevertheless a great advantage to the remaining
223     * numbers in the large set of possible multipliers: their multiply
224     * operation can be replaced by a faster operation based on just one
225     * shift plus either a single addition or subtraction operation. And
226     * because a hash function has to both distribute good _and_ has to
227     * be very fast to compute, those few numbers should be preferred.
228     *
229     *                  -- Ralf S. Engelschall <rse@engelschall.com>
230     */
231
232    if (*klen == APR_HASH_KEY_STRING) {
233        for (p = key; *p; p++) {
234            hash = hash * 33 + *p;
235        }
236        *klen = p - key;
237    }
238    else {
239        for (p = key, i = *klen; i; i--, p++) {
240            hash = hash * 33 + *p;
241        }
242    }
243
244    return hash;
245}
246
247APR_DECLARE_NONSTD(unsigned int) apr_hashfunc_default(const char *char_key,
248                                                      apr_ssize_t *klen)
249{
250    return hashfunc_default(char_key, klen, 0);
251}
252
253/*
254 * This is where we keep the details of the hash function and control
255 * the maximum collision rate.
256 *
257 * If val is non-NULL it creates and initializes a new hash entry if
258 * there isn't already one there; it returns an updatable pointer so
259 * that hash entries can be removed.
260 */
261
262static apr_hash_entry_t **find_entry(apr_hash_t *ht,
263                                     const void *key,
264                                     apr_ssize_t klen,
265                                     const void *val)
266{
267    apr_hash_entry_t **hep, *he;
268    unsigned int hash;
269
270    if (ht->hash_func)
271        hash = ht->hash_func(key, &klen);
272    else
273        hash = hashfunc_default(key, &klen, ht->seed);
274
275    /* scan linked list */
276    for (hep = &ht->array[hash & ht->max], he = *hep;
277         he; hep = &he->next, he = *hep) {
278        if (he->hash == hash
279            && he->klen == klen
280            && memcmp(he->key, key, klen) == 0)
281            break;
282    }
283    if (he || !val)
284        return hep;
285
286    /* add a new entry for non-NULL values */
287    if ((he = ht->free) != NULL)
288        ht->free = he->next;
289    else
290        he = apr_palloc(ht->pool, sizeof(*he));
291    he->next = NULL;
292    he->hash = hash;
293    he->key  = key;
294    he->klen = klen;
295    he->val  = val;
296    *hep = he;
297    ht->count++;
298    return hep;
299}
300
301APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
302                                        const apr_hash_t *orig)
303{
304    apr_hash_t *ht;
305    apr_hash_entry_t *new_vals;
306    unsigned int i, j;
307
308    ht = apr_palloc(pool, sizeof(apr_hash_t) +
309                    sizeof(*ht->array) * (orig->max + 1) +
310                    sizeof(apr_hash_entry_t) * orig->count);
311    ht->pool = pool;
312    ht->free = NULL;
313    ht->count = orig->count;
314    ht->max = orig->max;
315    ht->seed = orig->seed;
316    ht->hash_func = orig->hash_func;
317    ht->array = (apr_hash_entry_t **)((char *)ht + sizeof(apr_hash_t));
318
319    new_vals = (apr_hash_entry_t *)((char *)(ht) + sizeof(apr_hash_t) +
320                                    sizeof(*ht->array) * (orig->max + 1));
321    j = 0;
322    for (i = 0; i <= ht->max; i++) {
323        apr_hash_entry_t **new_entry = &(ht->array[i]);
324        apr_hash_entry_t *orig_entry = orig->array[i];
325        while (orig_entry) {
326            *new_entry = &new_vals[j++];
327            (*new_entry)->hash = orig_entry->hash;
328            (*new_entry)->key = orig_entry->key;
329            (*new_entry)->klen = orig_entry->klen;
330            (*new_entry)->val = orig_entry->val;
331            new_entry = &((*new_entry)->next);
332            orig_entry = orig_entry->next;
333        }
334        *new_entry = NULL;
335    }
336    return ht;
337}
338
339APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
340                                 const void *key,
341                                 apr_ssize_t klen)
342{
343    apr_hash_entry_t *he;
344    he = *find_entry(ht, key, klen, NULL);
345    if (he)
346        return (void *)he->val;
347    else
348        return NULL;
349}
350
351APR_DECLARE(void) apr_hash_set(apr_hash_t *ht,
352                               const void *key,
353                               apr_ssize_t klen,
354                               const void *val)
355{
356    apr_hash_entry_t **hep;
357    hep = find_entry(ht, key, klen, val);
358    if (*hep) {
359        if (!val) {
360            /* delete entry */
361            apr_hash_entry_t *old = *hep;
362            *hep = (*hep)->next;
363            old->next = ht->free;
364            ht->free = old;
365            --ht->count;
366        }
367        else {
368            /* replace entry */
369            (*hep)->val = val;
370            /* check that the collision rate isn't too high */
371            if (ht->count > ht->max) {
372                expand_array(ht);
373            }
374        }
375    }
376    /* else key not present and val==NULL */
377}
378
379APR_DECLARE(unsigned int) apr_hash_count(apr_hash_t *ht)
380{
381    return ht->count;
382}
383
384APR_DECLARE(void) apr_hash_clear(apr_hash_t *ht)
385{
386    apr_hash_index_t *hi;
387    for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi))
388        apr_hash_set(ht, hi->this->key, hi->this->klen, NULL);
389}
390
391APR_DECLARE(apr_hash_t*) apr_hash_overlay(apr_pool_t *p,
392                                          const apr_hash_t *overlay,
393                                          const apr_hash_t *base)
394{
395    return apr_hash_merge(p, overlay, base, NULL, NULL);
396}
397
398APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
399                                         const apr_hash_t *overlay,
400                                         const apr_hash_t *base,
401                                         void * (*merger)(apr_pool_t *p,
402                                                     const void *key,
403                                                     apr_ssize_t klen,
404                                                     const void *h1_val,
405                                                     const void *h2_val,
406                                                     const void *data),
407                                         const void *data)
408{
409    apr_hash_t *res;
410    apr_hash_entry_t *new_vals = NULL;
411    apr_hash_entry_t *iter;
412    apr_hash_entry_t *ent;
413    unsigned int i, j, k, hash;
414
415#if APR_POOL_DEBUG
416    /* we don't copy keys and values, so it's necessary that
417     * overlay->a.pool and base->a.pool have a life span at least
418     * as long as p
419     */
420    if (!apr_pool_is_ancestor(overlay->pool, p)) {
421        fprintf(stderr,
422                "apr_hash_merge: overlay's pool is not an ancestor of p\n");
423        abort();
424    }
425    if (!apr_pool_is_ancestor(base->pool, p)) {
426        fprintf(stderr,
427                "apr_hash_merge: base's pool is not an ancestor of p\n");
428        abort();
429    }
430#endif
431
432    res = apr_palloc(p, sizeof(apr_hash_t));
433    res->pool = p;
434    res->free = NULL;
435    res->hash_func = base->hash_func;
436    res->count = base->count;
437    res->max = (overlay->max > base->max) ? overlay->max : base->max;
438    if (base->count + overlay->count > res->max) {
439        res->max = res->max * 2 + 1;
440    }
441    res->seed = base->seed;
442    res->array = alloc_array(res, res->max);
443    if (base->count + overlay->count) {
444        new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
445                              (base->count + overlay->count));
446    }
447    j = 0;
448    for (k = 0; k <= base->max; k++) {
449        for (iter = base->array[k]; iter; iter = iter->next) {
450            i = iter->hash & res->max;
451            new_vals[j].klen = iter->klen;
452            new_vals[j].key = iter->key;
453            new_vals[j].val = iter->val;
454            new_vals[j].hash = iter->hash;
455            new_vals[j].next = res->array[i];
456            res->array[i] = &new_vals[j];
457            j++;
458        }
459    }
460
461    for (k = 0; k <= overlay->max; k++) {
462        for (iter = overlay->array[k]; iter; iter = iter->next) {
463            if (res->hash_func)
464                hash = res->hash_func(iter->key, &iter->klen);
465            else
466                hash = hashfunc_default(iter->key, &iter->klen, res->seed);
467            i = hash & res->max;
468            for (ent = res->array[i]; ent; ent = ent->next) {
469                if ((ent->klen == iter->klen) &&
470                    (memcmp(ent->key, iter->key, iter->klen) == 0)) {
471                    if (merger) {
472                        ent->val = (*merger)(p, iter->key, iter->klen,
473                                             iter->val, ent->val, data);
474                    }
475                    else {
476                        ent->val = iter->val;
477                    }
478                    break;
479                }
480            }
481            if (!ent) {
482                new_vals[j].klen = iter->klen;
483                new_vals[j].key = iter->key;
484                new_vals[j].val = iter->val;
485                new_vals[j].hash = hash;
486                new_vals[j].next = res->array[i];
487                res->array[i] = &new_vals[j];
488                res->count++;
489                j++;
490            }
491        }
492    }
493    return res;
494}
495
496/* This is basically the following...
497 * for every element in hash table {
498 *    comp elemeny.key, element.value
499 * }
500 *
501 * Like with apr_table_do, the comp callback is called for each and every
502 * element of the hash table.
503 */
504APR_DECLARE(int) apr_hash_do(apr_hash_do_callback_fn_t *comp,
505                             void *rec, const apr_hash_t *ht)
506{
507    apr_hash_index_t  hix;
508    apr_hash_index_t *hi;
509    int rv, dorv  = 1;
510
511    hix.ht    = (apr_hash_t *)ht;
512    hix.index = 0;
513    hix.this  = NULL;
514    hix.next  = NULL;
515
516    if ((hi = apr_hash_next(&hix))) {
517        /* Scan the entire table */
518        do {
519            rv = (*comp)(rec, hi->this->key, hi->this->klen, hi->this->val);
520        } while (rv && (hi = apr_hash_next(hi)));
521
522        if (rv == 0) {
523            dorv = 0;
524        }
525    }
526    return dorv;
527}
528
529APR_POOL_IMPLEMENT_ACCESSOR(hash)
530