1/*
2 * hash.c :  dumping and reading hash tables to/from files.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <stdlib.h>
27#include <limits.h>
28
29#include <apr_version.h>
30#include <apr_pools.h>
31#include <apr_hash.h>
32#include <apr_file_io.h>
33
34#ifndef SVN_HASH__GETS_SETS
35#define SVN_HASH__GETS_SETS
36#endif
37#include "svn_hash.h"
38
39#include "svn_types.h"
40#include "svn_string.h"
41#include "svn_error.h"
42#include "svn_sorts.h"
43#include "svn_io.h"
44#include "svn_pools.h"
45
46#include "private/svn_dep_compat.h"
47#include "private/svn_sorts_private.h"
48#include "private/svn_subr_private.h"
49
50#include "svn_private_config.h"
51
52
53
54/*
55 * The format of a dumped hash table is:
56 *
57 *   K <nlength>
58 *   name (a string of <nlength> bytes, followed by a newline)
59 *   V <vlength>
60 *   val (a string of <vlength> bytes, followed by a newline)
61 *   [... etc, etc ...]
62 *   END
63 *
64 *
65 * (Yes, there is a newline after END.)
66 *
67 * For example:
68 *
69 *   K 5
70 *   color
71 *   V 3
72 *   red
73 *   K 11
74 *   wine review
75 *   V 376
76 *   A forthright entrance, yet coquettish on the tongue, its deceptively
77 *   fruity exterior hides the warm mahagony undercurrent that is the
78 *   hallmark of Chateau Fraisant-Pitre.  Connoisseurs of the region will
79 *   be pleased to note the familiar, subtle hints of mulberries and
80 *   carburator fluid.  Its confident finish is marred only by a barely
81 *   detectable suggestion of rancid squid ink.
82 *   K 5
83 *   price
84 *   V 8
85 *   US $6.50
86 *   END
87 *
88 */
89
90
91
92
93/*** Dumping and loading hash files. */
94
95/* Implements svn_hash_read2 and svn_hash_read_incremental. */
96svn_error_t *
97svn_hash__read_entry(svn_hash__entry_t *entry,
98                     svn_stream_t *stream,
99                     const char *terminator,
100                     svn_boolean_t incremental,
101                     apr_pool_t *pool)
102{
103  svn_stringbuf_t *buf;
104  svn_boolean_t eof;
105  apr_size_t len;
106  char c;
107
108  svn_error_t *err;
109  apr_uint64_t ui64;
110
111  /* Read a key length line.  Might be END, though. */
112  SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
113
114  /* Check for the end of the hash. */
115  if ((!terminator && eof && buf->len == 0)
116      || (terminator && (strcmp(buf->data, terminator) == 0)))
117  {
118    entry->key = NULL;
119    entry->keylen = 0;
120    entry->val = NULL;
121    entry->vallen = 0;
122
123    return SVN_NO_ERROR;
124  }
125
126  /* Check for unexpected end of stream */
127  if (eof)
128    return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
129                            _("Serialized hash missing terminator"));
130
131  if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
132    {
133      /* Get the length of the key */
134      err = svn_cstring_strtoui64(&ui64, buf->data + 2,
135                                  0, APR_SIZE_MAX, 10);
136      if (err)
137        return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
138                                _("Serialized hash malformed key length"));
139      entry->keylen = (apr_size_t)ui64;
140
141      /* Now read that much into a buffer. */
142      entry->key = apr_palloc(pool, entry->keylen + 1);
143      SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
144      entry->key[entry->keylen] = '\0';
145
146      /* Suck up extra newline after key data */
147      len = 1;
148      SVN_ERR(svn_stream_read_full(stream, &c, &len));
149      if (c != '\n')
150        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
151                                _("Serialized hash malformed key data"));
152
153      /* Read a val length line */
154      SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
155
156      if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
157        {
158          /* Get the length of the val */
159          err = svn_cstring_strtoui64(&ui64, buf->data + 2,
160                                      0, APR_SIZE_MAX, 10);
161          if (err)
162            return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
163                                    _("Serialized hash malformed value length"));
164          entry->vallen = (apr_size_t)ui64;
165
166          entry->val = apr_palloc(pool, entry->vallen + 1);
167          SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen));
168          entry->val[entry->vallen] = '\0';
169
170          /* Suck up extra newline after val data */
171          len = 1;
172          SVN_ERR(svn_stream_read_full(stream, &c, &len));
173          if (c != '\n')
174            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
175                                    _("Serialized hash malformed value data"));
176        }
177      else
178        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
179                                _("Serialized hash malformed"));
180    }
181  else if (incremental && (buf->len >= 3)
182           && (buf->data[0] == 'D') && (buf->data[1] == ' '))
183    {
184      /* Get the length of the key */
185      err = svn_cstring_strtoui64(&ui64, buf->data + 2,
186                                  0, APR_SIZE_MAX, 10);
187      if (err)
188        return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
189                                _("Serialized hash malformed key length"));
190      entry->keylen = (apr_size_t)ui64;
191
192      /* Now read that much into a buffer. */
193      entry->key = apr_palloc(pool, entry->keylen + 1);
194      SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
195      entry->key[entry->keylen] = '\0';
196
197      /* Suck up extra newline after key data */
198      len = 1;
199      SVN_ERR(svn_stream_read_full(stream, &c, &len));
200      if (c != '\n')
201        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
202                                _("Serialized hash malformed key data"));
203
204      /* Remove this hash entry. */
205      entry->vallen = 0;
206      entry->val = NULL;
207    }
208  else
209    {
210      return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
211                              _("Serialized hash malformed"));
212    }
213
214  return SVN_NO_ERROR;
215}
216
217static svn_error_t *
218hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
219          svn_boolean_t incremental, apr_pool_t *pool)
220{
221  apr_pool_t *iterpool = svn_pool_create(pool);
222
223  while (1)
224    {
225      svn_hash__entry_t entry;
226
227      svn_pool_clear(iterpool);
228      SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
229                                   incremental, iterpool));
230
231      /* end of hash? */
232      if (entry.key == NULL)
233        break;
234
235      if (entry.val)
236        {
237          /* Add a new hash entry. */
238          apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen),
239                       entry.keylen,
240                       svn_string_ncreate(entry.val, entry.vallen, pool));
241        }
242      else
243        {
244          /* Remove this hash entry. */
245          apr_hash_set(hash, entry.key, entry.keylen, NULL);
246        }
247    }
248
249  svn_pool_destroy(iterpool);
250  return SVN_NO_ERROR;
251}
252
253
254/* Implements svn_hash_write2 and svn_hash_write_incremental. */
255static svn_error_t *
256hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
257           const char *terminator, apr_pool_t *pool)
258{
259  apr_pool_t *subpool;
260  apr_size_t len;
261  apr_array_header_t *list;
262  int i;
263
264  subpool = svn_pool_create(pool);
265
266  list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
267  for (i = 0; i < list->nelts; i++)
268    {
269      svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
270      svn_string_t *valstr = item->value;
271
272      svn_pool_clear(subpool);
273
274      /* Don't output entries equal to the ones in oldhash, if present. */
275      if (oldhash)
276        {
277          svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
278
279          if (oldstr && svn_string_compare(valstr, oldstr))
280            continue;
281        }
282
283      if (item->klen < 0)
284        return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
285                                _("Cannot serialize negative length"));
286
287      /* Write it out. */
288      SVN_ERR(svn_stream_printf(stream, subpool,
289                                "K %" APR_SIZE_T_FMT "\n%s\n"
290                                "V %" APR_SIZE_T_FMT "\n",
291                                (apr_size_t) item->klen,
292                                (const char *) item->key,
293                                valstr->len));
294      len = valstr->len;
295      SVN_ERR(svn_stream_write(stream, valstr->data, &len));
296      SVN_ERR(svn_stream_puts(stream, "\n"));
297    }
298
299  if (oldhash)
300    {
301      /* Output a deletion entry for each property in oldhash but not hash. */
302      list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
303                            pool);
304      for (i = 0; i < list->nelts; i++)
305        {
306          svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
307
308          svn_pool_clear(subpool);
309
310          /* If it's not present in the new hash, write out a D entry. */
311          if (! apr_hash_get(hash, item->key, item->klen))
312            SVN_ERR(svn_stream_printf(stream, subpool,
313                                      "D %" APR_SSIZE_T_FMT "\n%s\n",
314                                      item->klen, (const char *) item->key));
315        }
316    }
317
318  if (terminator)
319    SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
320
321  svn_pool_destroy(subpool);
322  return SVN_NO_ERROR;
323}
324
325
326svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
327                            const char *terminator, apr_pool_t *pool)
328{
329  return hash_read(hash, stream, terminator, FALSE, pool);
330}
331
332
333svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
334                                       svn_stream_t *stream,
335                                       const char *terminator,
336                                       apr_pool_t *pool)
337{
338  return hash_read(hash, stream, terminator, TRUE, pool);
339}
340
341
342svn_error_t *
343svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
344                const char *terminator, apr_pool_t *pool)
345{
346  return hash_write(hash, NULL, stream, terminator, pool);
347}
348
349
350svn_error_t *
351svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
352                           svn_stream_t *stream, const char *terminator,
353                           apr_pool_t *pool)
354{
355  SVN_ERR_ASSERT(oldhash != NULL);
356  return hash_write(hash, oldhash, stream, terminator, pool);
357}
358
359
360svn_error_t *
361svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
362{
363  return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
364                    SVN_HASH_TERMINATOR, pool);
365}
366
367
368/* There are enough quirks in the deprecated svn_hash_read that we
369   should just preserve its implementation. */
370svn_error_t *
371svn_hash_read(apr_hash_t *hash,
372              apr_file_t *srcfile,
373              apr_pool_t *pool)
374{
375  svn_error_t *err;
376  char buf[SVN_KEYLINE_MAXLEN];
377  apr_size_t num_read;
378  char c;
379  int first_time = 1;
380
381
382  while (1)
383    {
384      /* Read a key length line.  Might be END, though. */
385      apr_size_t len = sizeof(buf);
386
387      err = svn_io_read_length_line(srcfile, buf, &len, pool);
388      if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
389        {
390          /* We got an EOF on our very first attempt to read, which
391             means it's a zero-byte file.  No problem, just go home. */
392          svn_error_clear(err);
393          return SVN_NO_ERROR;
394        }
395      else if (err)
396        /* Any other circumstance is a genuine error. */
397        return err;
398
399      first_time = 0;
400
401      if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
402          || ((len == 9)
403              && (buf[0] == 'P')
404              && (buf[1] == 'R')       /* We formerly used just "END" to */
405              && (buf[2] == 'O')       /* end a property hash, but later */
406              && (buf[3] == 'P')       /* we added "PROPS-END", so that  */
407              && (buf[4] == 'S')       /* the fs dump format would be    */
408              && (buf[5] == '-')       /* more human-readable.  That's   */
409              && (buf[6] == 'E')       /* why we accept either way here. */
410              && (buf[7] == 'N')
411              && (buf[8] == 'D')))
412        {
413          /* We've reached the end of the dumped hash table, so leave. */
414          return SVN_NO_ERROR;
415        }
416      else if ((buf[0] == 'K') && (buf[1] == ' '))
417        {
418          size_t keylen;
419          int parsed_len;
420          void *keybuf;
421
422          /* Get the length of the key */
423          SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
424          keylen = parsed_len;
425
426          /* Now read that much into a buffer, + 1 byte for null terminator */
427          keybuf = apr_palloc(pool, keylen + 1);
428          SVN_ERR(svn_io_file_read_full2(srcfile,
429                                         keybuf, keylen,
430                                         &num_read, NULL, pool));
431          ((char *) keybuf)[keylen] = '\0';
432
433          /* Suck up extra newline after key data */
434          SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
435          if (c != '\n')
436            return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
437
438          /* Read a val length line */
439          len = sizeof(buf);
440          SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
441
442          if ((buf[0] == 'V') && (buf[1] == ' '))
443            {
444              svn_string_t *value = apr_palloc(pool, sizeof(*value));
445              apr_size_t vallen;
446              void *valbuf;
447
448              /* Get the length of the value */
449              SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
450              vallen = parsed_len;
451
452              /* Again, 1 extra byte for the null termination. */
453              valbuf = apr_palloc(pool, vallen + 1);
454              SVN_ERR(svn_io_file_read_full2(srcfile,
455                                             valbuf, vallen,
456                                             &num_read, NULL, pool));
457              ((char *) valbuf)[vallen] = '\0';
458
459              /* Suck up extra newline after val data */
460              SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
461              if (c != '\n')
462                return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
463
464              value->data = valbuf;
465              value->len = vallen;
466
467              /* The Grand Moment:  add a new hash entry! */
468              apr_hash_set(hash, keybuf, keylen, value);
469            }
470          else
471            {
472              return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
473            }
474        }
475      else
476        {
477          return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
478        }
479    } /* while (1) */
480}
481
482
483
484/*** Diffing hashes ***/
485
486svn_error_t *
487svn_hash_diff(apr_hash_t *hash_a,
488              apr_hash_t *hash_b,
489              svn_hash_diff_func_t diff_func,
490              void *diff_func_baton,
491              apr_pool_t *pool)
492{
493  apr_hash_index_t *hi;
494
495  if (hash_a)
496    for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
497      {
498        const void *key;
499        apr_ssize_t klen;
500
501        apr_hash_this(hi, &key, &klen, NULL);
502
503        if (hash_b && (apr_hash_get(hash_b, key, klen)))
504          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
505                               diff_func_baton));
506        else
507          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
508                               diff_func_baton));
509      }
510
511  if (hash_b)
512    for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
513      {
514        const void *key;
515        apr_ssize_t klen;
516
517        apr_hash_this(hi, &key, &klen, NULL);
518
519        if (! (hash_a && apr_hash_get(hash_a, key, klen)))
520          SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
521                               diff_func_baton));
522      }
523
524  return SVN_NO_ERROR;
525}
526
527
528/*** Misc. hash APIs ***/
529
530svn_error_t *
531svn_hash_keys(apr_array_header_t **array,
532              apr_hash_t *hash,
533              apr_pool_t *pool)
534{
535  apr_hash_index_t *hi;
536
537  *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
538
539  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
540    {
541      APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi);
542    }
543
544  return SVN_NO_ERROR;
545}
546
547
548svn_error_t *
549svn_hash_from_cstring_keys(apr_hash_t **hash_p,
550                           const apr_array_header_t *keys,
551                           apr_pool_t *pool)
552{
553  int i;
554  apr_hash_t *hash = svn_hash__make(pool);
555  for (i = 0; i < keys->nelts; i++)
556    {
557      const char *key =
558        apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
559      svn_hash_sets(hash, key, key);
560    }
561  *hash_p = hash;
562  return SVN_NO_ERROR;
563}
564
565
566void *
567svn_hash__gets_debug(apr_hash_t *ht, const char *key)
568{
569  return apr_hash_get(ht, key, APR_HASH_KEY_STRING);
570}
571
572
573void
574svn_hash__sets_debug(apr_hash_t *ht, const char *key, const void *val)
575{
576  apr_hash_set(ht, key, APR_HASH_KEY_STRING, val);
577}
578
579
580
581/*** Specialized getter APIs ***/
582
583const char *
584svn_hash__get_cstring(apr_hash_t *hash,
585                      const char *key,
586                      const char *default_value)
587{
588  if (hash)
589    {
590      const char *value = svn_hash_gets(hash, key);
591      return value ? value : default_value;
592    }
593
594  return default_value;
595}
596
597
598svn_boolean_t
599svn_hash__get_bool(apr_hash_t *hash, const char *key,
600                   svn_boolean_t default_value)
601{
602  const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
603  svn_tristate_t value = svn_tristate__from_word(tmp_value);
604
605  if (value == svn_tristate_true)
606    return TRUE;
607  else if (value == svn_tristate_false)
608    return FALSE;
609
610  return default_value;
611}
612
613
614
615/*** Optimized hash function ***/
616
617/* apr_hashfunc_t optimized for the key that we use in SVN: paths and
618 * property names.  Its primary goal is speed for keys of known length.
619 *
620 * Since strings tend to spawn large value spaces (usually differ in many
621 * bits with differences spanning a larger section of the key), we can be
622 * quite sloppy extracting a hash value.  The more keys there are in a
623 * hash container, the more bits of the value returned by this function
624 * will be used.  For a small number of string keys, choosing bits from any
625 * any fix location close to the tail of those keys would usually be good
626 * enough to prevent high collision rates.
627 */
628static unsigned int
629hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
630{
631    unsigned int hash = 0;
632    const unsigned char *key = (const unsigned char *)char_key;
633    const unsigned char *p;
634    apr_ssize_t i;
635
636    if (*klen == APR_HASH_KEY_STRING)
637      *klen = strlen(char_key);
638
639#if SVN_UNALIGNED_ACCESS_IS_OK
640    for (p = key, i = *klen; i >= 4; i-=4, p+=4)
641      {
642        apr_uint32_t chunk = *(const apr_uint32_t *)p;
643
644        /* the ">> 17" part gives upper bits in the chunk a chance to make
645           some impact as well */
646        hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
647      }
648#else
649    for (p = key, i = *klen; i >= 4; i-=4, p+=4)
650      {
651        hash = hash * 33 * 33 * 33 * 33
652              + p[0] * 33 * 33 * 33
653              + p[1] * 33 * 33
654              + p[2] * 33
655              + p[3];
656      }
657#endif
658    for (; i; i--, p++)
659        hash = hash * 33 + *p;
660
661    return hash;
662}
663
664apr_hash_t *
665svn_hash__make(apr_pool_t *pool)
666{
667  return apr_hash_make_custom(pool, hashfunc_compatible);
668}
669