1/*
2 * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <string.h>
29#include <sys/types.h>
30
31#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
32#include <AssertMacros.h>
33
34#include "kxld_dict.h"
35#include "kxld_util.h"
36
37/*******************************************************************************
38* Types and macros
39*******************************************************************************/
40
41/* Ratio of num_entries:num_buckets that will cause a resize */
42#define RESIZE_NUMER 7
43#define RESIZE_DENOM 10
44#define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM)
45#define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER)
46
47/* Selected for good scaling qualities when resizing dictionary
48 * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm
49 */
50#define DEFAULT_DICT_SIZE 89
51
52typedef struct dict_entry DictEntry;
53
54typedef enum {
55    EMPTY = 0,
56    USED = 1,
57    DELETED = 2
58} DictEntryState;
59
60struct dict_entry {
61    const void *key;
62    void *value;
63    DictEntryState state;
64};
65
66/*******************************************************************************
67* Function prototypes
68*******************************************************************************/
69
70static kern_return_t get_locate_index(const KXLDDict *dict, const void *key,
71    u_int *idx);
72static kern_return_t get_insert_index(const KXLDDict *dict, const void *key,
73    u_int *idx);
74static kern_return_t resize_dict(KXLDDict *dict);
75
76/*******************************************************************************
77*******************************************************************************/
78kern_return_t
79kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp,
80    u_int num_entries)
81{
82    kern_return_t rval = KERN_FAILURE;
83    u_int min_buckets = MIN_BUCKETS(num_entries);
84    u_int num_buckets = DEFAULT_DICT_SIZE;
85
86    check(dict);
87    check(hash);
88    check(cmp);
89
90    /* We want the number of allocated buckets to be at least twice that of the
91     * number to be inserted.
92     */
93    while (min_buckets > num_buckets) {
94        num_buckets *= 2;
95        num_buckets++;
96    }
97
98    /* Allocate enough buckets for the anticipated number of entries */
99    rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets);
100    require_noerr(rval, finish);
101
102    /* Initialize */
103    dict->hash = hash;
104    dict->cmp = cmp;
105    dict->num_entries = 0;
106    dict->resize_threshold = RESIZE_THRESHOLD(num_buckets);
107
108    rval = KERN_SUCCESS;
109
110finish:
111    return rval;
112}
113
114/*******************************************************************************
115*******************************************************************************/
116void
117kxld_dict_clear(KXLDDict *dict)
118{
119    check(dict);
120
121    dict->hash = NULL;
122    dict->cmp = NULL;
123    dict->num_entries = 0;
124    dict->resize_threshold = 0;
125    kxld_array_clear(&dict->buckets);
126    kxld_array_clear(&dict->resize_buckets);
127}
128
129/*******************************************************************************
130*******************************************************************************/
131void
132kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict)
133{
134    check(iter);
135    check(dict);
136
137    iter->idx = 0;
138    iter->dict = dict;
139}
140
141/*******************************************************************************
142*******************************************************************************/
143void
144kxld_dict_deinit(KXLDDict *dict)
145{
146    check(dict);
147
148    kxld_array_deinit(&dict->buckets);
149    kxld_array_deinit(&dict->resize_buckets);
150}
151
152/*******************************************************************************
153*******************************************************************************/
154u_int
155kxld_dict_get_num_entries(const KXLDDict *dict)
156{
157    check(dict);
158
159    return dict->num_entries;
160}
161
162/*******************************************************************************
163*******************************************************************************/
164void *
165kxld_dict_find(const KXLDDict *dict, const void *key)
166{
167    kern_return_t rval = KERN_FAILURE;
168    DictEntry *entry = NULL;
169    u_int idx = 0;
170
171    check(dict);
172    check(key);
173
174    rval = get_locate_index(dict, key, &idx);
175    if (rval) return NULL;
176
177    entry = kxld_array_get_item(&dict->buckets, idx);
178
179    return entry->value;
180}
181
182/*******************************************************************************
183* This dictionary uses linear probing, which means that when there is a
184* collision, we just walk along the buckets until a free bucket shows up.
185* A consequence of this is that when looking up an item, items that lie between
186* its hash value and its actual bucket may have been deleted since it was
187* inserted.  Thus, we should only stop a lookup when we've wrapped around the
188* dictionary or encountered an EMPTY bucket.
189********************************************************************************/
190static kern_return_t
191get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx)
192{
193    kern_return_t rval = KERN_FAILURE;
194    DictEntry *entry = NULL;
195    u_int base, idx;
196
197    base = idx = dict->hash(dict, key);
198
199    /* Iterate until we match the key, wrap, or hit an empty bucket */
200    entry = kxld_array_get_item(&dict->buckets, idx);
201    while (!dict->cmp(entry->key, key)) {
202        if (entry->state == EMPTY) goto finish;
203
204        idx = (idx + 1) % dict->buckets.nitems;
205        if (idx == base) goto finish;
206
207        entry = kxld_array_get_item(&dict->buckets, idx);
208    }
209
210    check(idx < dict->buckets.nitems);
211
212    *_idx = idx;
213    rval = KERN_SUCCESS;
214
215finish:
216    return rval;
217}
218
219/*******************************************************************************
220*******************************************************************************/
221kern_return_t
222kxld_dict_insert(KXLDDict *dict, const void *key, void *value)
223{
224    kern_return_t rval = KERN_FAILURE;
225    DictEntry *entry = NULL;
226    u_int idx = 0;
227
228    check(dict);
229    check(key);
230    check(value);
231
232    /* Resize if we are greater than the capacity threshold.
233     * Note: this is expensive, but the dictionary can be sized correctly at
234     * construction to avoid ever having to do this.
235     */
236    while (dict->num_entries > dict->resize_threshold) {
237        rval = resize_dict(dict);
238        require_noerr(rval, finish);
239    }
240
241    /* If this function returns FULL after we've already resized appropriately
242     * something is very wrong and we should return an error.
243     */
244    rval = get_insert_index(dict, key, &idx);
245    require_noerr(rval, finish);
246
247    /* Insert the new key-value pair into the bucket, but only count it as a
248     * new entry if we are not overwriting an existing entry.
249     */
250    entry = kxld_array_get_item(&dict->buckets, idx);
251    if (entry->state != USED) {
252        dict->num_entries++;
253        entry->key = key;
254        entry->state = USED;
255    }
256    entry->value = value;
257
258    rval = KERN_SUCCESS;
259
260finish:
261    return rval;
262}
263
264/*******************************************************************************
265* Increases the hash table's capacity by 2N+1.  Uses dictionary API.  Not
266* fast; just correct.
267*******************************************************************************/
268static kern_return_t
269resize_dict(KXLDDict *dict)
270{
271    kern_return_t rval = KERN_FAILURE;
272    KXLDArray tmparray;
273    DictEntry *entry = NULL;
274    u_int nbuckets = (dict->buckets.nitems * 2 + 1);
275    u_int i = 0;
276
277    check(dict);
278
279    /* Initialize a new set of buckets to hold more entries */
280    rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets);
281    require_noerr(rval, finish);
282
283    /* Swap the new buckets with the old buckets */
284    tmparray = dict->buckets;
285    dict->buckets = dict->resize_buckets;
286    dict->resize_buckets = tmparray;
287
288    /* Reset dictionary parameters */
289    dict->num_entries = 0;
290    dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems);
291
292    /* Rehash all of the entries */
293    for (i = 0; i < dict->resize_buckets.nitems; ++i) {
294        entry = kxld_array_get_item(&dict->resize_buckets, i);
295        if (entry->state == USED) {
296            rval = kxld_dict_insert(dict, entry->key, entry->value);
297            require_noerr(rval, finish);
298        }
299    }
300
301    /* Clear the old buckets */
302    kxld_array_clear(&dict->resize_buckets);
303
304    rval = KERN_SUCCESS;
305
306finish:
307    return rval;
308}
309
310/*******************************************************************************
311* Simple function to find the first empty cell
312*******************************************************************************/
313static kern_return_t
314get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index)
315{
316    kern_return_t rval = KERN_FAILURE;
317    DictEntry *entry = NULL;
318    u_int base, idx;
319
320    base = idx = dict->hash(dict, key);
321
322    /* Iterate through the buckets until we find an EMPTY bucket, a DELETED
323     * bucket, or a key match.
324     */
325    entry = kxld_array_get_item(&dict->buckets, idx);
326    while (entry->state == USED && !dict->cmp(entry->key, key)) {
327        idx = (idx + 1) % dict->buckets.nitems;
328        require_action(base != idx, finish, rval=KERN_FAILURE);
329        entry = kxld_array_get_item(&dict->buckets, idx);
330    }
331
332    *r_index = idx;
333    rval = KERN_SUCCESS;
334
335finish:
336    return rval;
337}
338
339/*******************************************************************************
340*******************************************************************************/
341void
342kxld_dict_remove(KXLDDict *dict, const void *key, void **value)
343{
344    kern_return_t rval = KERN_FAILURE;
345    DictEntry *entry = NULL;
346    u_int idx = 0;
347
348    check(dict);
349    check(key);
350
351    /* Find the item */
352    rval = get_locate_index(dict, key, &idx);
353    if (rval) {
354        if (value) *value = NULL;
355        return;
356    }
357
358    entry = kxld_array_get_item(&dict->buckets, idx);
359
360    /* Save the value if requested */
361    if (value) *value = entry->value;
362
363    /* Delete the item from the dictionary */
364    entry->key = NULL;
365    entry->value = NULL;
366    entry->state = DELETED;
367    dict->num_entries--;
368}
369
370/*******************************************************************************
371*******************************************************************************/
372void
373kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key,
374    void **value)
375{
376    DictEntry *entry = NULL;
377
378    check(iter);
379    check(key);
380    check(value);
381
382    *key = NULL;
383    *value = NULL;
384
385    /* Walk over the dictionary looking for USED buckets */
386    for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) {
387        entry = kxld_array_get_item(&iter->dict->buckets, iter->idx);
388        if (entry->state == USED) {
389            *key = entry->key;
390            *value = entry->value;
391            ++(iter->idx);
392            break;
393        }
394    }
395}
396
397/*******************************************************************************
398*******************************************************************************/
399void
400kxld_dict_iterator_reset(KXLDDictIterator *iter)
401{
402    iter->idx = 0;
403}
404
405/*******************************************************************************
406* This is Daniel Bernstein's hash algorithm from comp.lang.c
407* It's fast and distributes well.  Returns an idx into the symbol hash table.
408* NOTE: Will not check for a valid pointer - performance
409*******************************************************************************/
410u_int
411kxld_dict_string_hash(const KXLDDict *dict, const void *_key)
412{
413    const char *key = _key;
414    u_int c = 0;
415    u_int hash_val = 5381;
416
417    check(dict);
418    check(_key);
419
420    while ((c = *key++)) {
421        /* hash(i) = hash(i-1) *33 ^ name[i] */
422        hash_val = ((hash_val << 5) + hash_val) ^ c;
423    }
424
425    return (hash_val % dict->buckets.nitems);
426}
427
428u_int
429kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key)
430{
431    uint32_t key = *(const uint32_t *) _key;
432
433    check(_key);
434
435    return (u_int) (key % dict->buckets.nitems);
436}
437
438u_int
439kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key)
440{
441    kxld_addr_t key = *(const kxld_addr_t *) _key;
442
443    check(_key);
444
445    return (u_int) (key % dict->buckets.nitems);
446}
447
448u_int
449kxld_dict_string_cmp(const void *key1, const void *key2)
450{
451    return streq(key1, key2);
452}
453
454u_int
455kxld_dict_uint32_cmp(const void *key1, const void *key2)
456{
457    const uint32_t *a = key1;
458    const uint32_t *b = key2;
459
460    return (a && b && (*a == *b));
461}
462
463u_int
464kxld_dict_kxldaddr_cmp(const void *key1, const void *key2)
465{
466    const kxld_addr_t *a = key1;
467    const kxld_addr_t *b = key2;
468
469    return (a && b && (*a == *b));
470}
471
472