1/* 2 * hash.c : dumping and reading hash tables to/from files. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <stdlib.h> 27#include <limits.h> 28 29#include <apr_version.h> 30#include <apr_pools.h> 31#include <apr_hash.h> 32#include <apr_file_io.h> 33 34#ifndef SVN_HASH__GETS_SETS 35#define SVN_HASH__GETS_SETS 36#endif 37#include "svn_hash.h" 38 39#include "svn_types.h" 40#include "svn_string.h" 41#include "svn_error.h" 42#include "svn_sorts.h" 43#include "svn_io.h" 44#include "svn_pools.h" 45 46#include "private/svn_dep_compat.h" 47#include "private/svn_sorts_private.h" 48#include "private/svn_subr_private.h" 49 50#include "svn_private_config.h" 51 52 53 54/* 55 * The format of a dumped hash table is: 56 * 57 * K <nlength> 58 * name (a string of <nlength> bytes, followed by a newline) 59 * V <vlength> 60 * val (a string of <vlength> bytes, followed by a newline) 61 * [... etc, etc ...] 62 * END 63 * 64 * 65 * (Yes, there is a newline after END.) 66 * 67 * For example: 68 * 69 * K 5 70 * color 71 * V 3 72 * red 73 * K 11 74 * wine review 75 * V 376 76 * A forthright entrance, yet coquettish on the tongue, its deceptively 77 * fruity exterior hides the warm mahagony undercurrent that is the 78 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will 79 * be pleased to note the familiar, subtle hints of mulberries and 80 * carburator fluid. Its confident finish is marred only by a barely 81 * detectable suggestion of rancid squid ink. 82 * K 5 83 * price 84 * V 8 85 * US $6.50 86 * END 87 * 88 */ 89 90 91 92 93/*** Dumping and loading hash files. */ 94 95/* Implements svn_hash_read2 and svn_hash_read_incremental. */ 96svn_error_t * 97svn_hash__read_entry(svn_hash__entry_t *entry, 98 svn_stream_t *stream, 99 const char *terminator, 100 svn_boolean_t incremental, 101 apr_pool_t *pool) 102{ 103 svn_stringbuf_t *buf; 104 svn_boolean_t eof; 105 apr_size_t len; 106 char c; 107 108 svn_error_t *err; 109 apr_uint64_t ui64; 110 111 /* Read a key length line. Might be END, though. */ 112 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); 113 114 /* Check for the end of the hash. */ 115 if ((!terminator && eof && buf->len == 0) 116 || (terminator && (strcmp(buf->data, terminator) == 0))) 117 { 118 entry->key = NULL; 119 entry->keylen = 0; 120 entry->val = NULL; 121 entry->vallen = 0; 122 123 return SVN_NO_ERROR; 124 } 125 126 /* Check for unexpected end of stream */ 127 if (eof) 128 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 129 _("Serialized hash missing terminator")); 130 131 if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' ')) 132 { 133 /* Get the length of the key */ 134 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 135 0, APR_SIZE_MAX, 10); 136 if (err) 137 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 138 _("Serialized hash malformed key length")); 139 entry->keylen = (apr_size_t)ui64; 140 141 /* Now read that much into a buffer. */ 142 entry->key = apr_palloc(pool, entry->keylen + 1); 143 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); 144 entry->key[entry->keylen] = '\0'; 145 146 /* Suck up extra newline after key data */ 147 len = 1; 148 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 149 if (c != '\n') 150 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 151 _("Serialized hash malformed key data")); 152 153 /* Read a val length line */ 154 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); 155 156 if ((buf->data[0] == 'V') && (buf->data[1] == ' ')) 157 { 158 /* Get the length of the val */ 159 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 160 0, APR_SIZE_MAX, 10); 161 if (err) 162 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 163 _("Serialized hash malformed value length")); 164 entry->vallen = (apr_size_t)ui64; 165 166 entry->val = apr_palloc(pool, entry->vallen + 1); 167 SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen)); 168 entry->val[entry->vallen] = '\0'; 169 170 /* Suck up extra newline after val data */ 171 len = 1; 172 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 173 if (c != '\n') 174 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 175 _("Serialized hash malformed value data")); 176 } 177 else 178 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 179 _("Serialized hash malformed")); 180 } 181 else if (incremental && (buf->len >= 3) 182 && (buf->data[0] == 'D') && (buf->data[1] == ' ')) 183 { 184 /* Get the length of the key */ 185 err = svn_cstring_strtoui64(&ui64, buf->data + 2, 186 0, APR_SIZE_MAX, 10); 187 if (err) 188 return svn_error_create(SVN_ERR_MALFORMED_FILE, err, 189 _("Serialized hash malformed key length")); 190 entry->keylen = (apr_size_t)ui64; 191 192 /* Now read that much into a buffer. */ 193 entry->key = apr_palloc(pool, entry->keylen + 1); 194 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); 195 entry->key[entry->keylen] = '\0'; 196 197 /* Suck up extra newline after key data */ 198 len = 1; 199 SVN_ERR(svn_stream_read_full(stream, &c, &len)); 200 if (c != '\n') 201 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 202 _("Serialized hash malformed key data")); 203 204 /* Remove this hash entry. */ 205 entry->vallen = 0; 206 entry->val = NULL; 207 } 208 else 209 { 210 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 211 _("Serialized hash malformed")); 212 } 213 214 return SVN_NO_ERROR; 215} 216 217static svn_error_t * 218hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator, 219 svn_boolean_t incremental, apr_pool_t *pool) 220{ 221 apr_pool_t *iterpool = svn_pool_create(pool); 222 223 while (1) 224 { 225 svn_hash__entry_t entry; 226 227 svn_pool_clear(iterpool); 228 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator, 229 incremental, iterpool)); 230 231 /* end of hash? */ 232 if (entry.key == NULL) 233 break; 234 235 if (entry.val) 236 { 237 /* Add a new hash entry. */ 238 apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen), 239 entry.keylen, 240 svn_string_ncreate(entry.val, entry.vallen, pool)); 241 } 242 else 243 { 244 /* Remove this hash entry. */ 245 apr_hash_set(hash, entry.key, entry.keylen, NULL); 246 } 247 } 248 249 svn_pool_destroy(iterpool); 250 return SVN_NO_ERROR; 251} 252 253 254/* Implements svn_hash_write2 and svn_hash_write_incremental. */ 255static svn_error_t * 256hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream, 257 const char *terminator, apr_pool_t *pool) 258{ 259 apr_pool_t *subpool; 260 apr_size_t len; 261 apr_array_header_t *list; 262 int i; 263 264 subpool = svn_pool_create(pool); 265 266 list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool); 267 for (i = 0; i < list->nelts; i++) 268 { 269 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 270 svn_string_t *valstr = item->value; 271 272 svn_pool_clear(subpool); 273 274 /* Don't output entries equal to the ones in oldhash, if present. */ 275 if (oldhash) 276 { 277 svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen); 278 279 if (oldstr && svn_string_compare(valstr, oldstr)) 280 continue; 281 } 282 283 if (item->klen < 0) 284 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, 285 _("Cannot serialize negative length")); 286 287 /* Write it out. */ 288 SVN_ERR(svn_stream_printf(stream, subpool, 289 "K %" APR_SIZE_T_FMT "\n%s\n" 290 "V %" APR_SIZE_T_FMT "\n", 291 (apr_size_t) item->klen, 292 (const char *) item->key, 293 valstr->len)); 294 len = valstr->len; 295 SVN_ERR(svn_stream_write(stream, valstr->data, &len)); 296 SVN_ERR(svn_stream_puts(stream, "\n")); 297 } 298 299 if (oldhash) 300 { 301 /* Output a deletion entry for each property in oldhash but not hash. */ 302 list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically, 303 pool); 304 for (i = 0; i < list->nelts; i++) 305 { 306 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t); 307 308 svn_pool_clear(subpool); 309 310 /* If it's not present in the new hash, write out a D entry. */ 311 if (! apr_hash_get(hash, item->key, item->klen)) 312 SVN_ERR(svn_stream_printf(stream, subpool, 313 "D %" APR_SSIZE_T_FMT "\n%s\n", 314 item->klen, (const char *) item->key)); 315 } 316 } 317 318 if (terminator) 319 SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator)); 320 321 svn_pool_destroy(subpool); 322 return SVN_NO_ERROR; 323} 324 325 326svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream, 327 const char *terminator, apr_pool_t *pool) 328{ 329 return hash_read(hash, stream, terminator, FALSE, pool); 330} 331 332 333svn_error_t *svn_hash_read_incremental(apr_hash_t *hash, 334 svn_stream_t *stream, 335 const char *terminator, 336 apr_pool_t *pool) 337{ 338 return hash_read(hash, stream, terminator, TRUE, pool); 339} 340 341 342svn_error_t * 343svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream, 344 const char *terminator, apr_pool_t *pool) 345{ 346 return hash_write(hash, NULL, stream, terminator, pool); 347} 348 349 350svn_error_t * 351svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash, 352 svn_stream_t *stream, const char *terminator, 353 apr_pool_t *pool) 354{ 355 SVN_ERR_ASSERT(oldhash != NULL); 356 return hash_write(hash, oldhash, stream, terminator, pool); 357} 358 359 360svn_error_t * 361svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool) 362{ 363 return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool), 364 SVN_HASH_TERMINATOR, pool); 365} 366 367 368/* There are enough quirks in the deprecated svn_hash_read that we 369 should just preserve its implementation. */ 370svn_error_t * 371svn_hash_read(apr_hash_t *hash, 372 apr_file_t *srcfile, 373 apr_pool_t *pool) 374{ 375 svn_error_t *err; 376 char buf[SVN_KEYLINE_MAXLEN]; 377 apr_size_t num_read; 378 char c; 379 int first_time = 1; 380 381 382 while (1) 383 { 384 /* Read a key length line. Might be END, though. */ 385 apr_size_t len = sizeof(buf); 386 387 err = svn_io_read_length_line(srcfile, buf, &len, pool); 388 if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time) 389 { 390 /* We got an EOF on our very first attempt to read, which 391 means it's a zero-byte file. No problem, just go home. */ 392 svn_error_clear(err); 393 return SVN_NO_ERROR; 394 } 395 else if (err) 396 /* Any other circumstance is a genuine error. */ 397 return err; 398 399 first_time = 0; 400 401 if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D')) 402 || ((len == 9) 403 && (buf[0] == 'P') 404 && (buf[1] == 'R') /* We formerly used just "END" to */ 405 && (buf[2] == 'O') /* end a property hash, but later */ 406 && (buf[3] == 'P') /* we added "PROPS-END", so that */ 407 && (buf[4] == 'S') /* the fs dump format would be */ 408 && (buf[5] == '-') /* more human-readable. That's */ 409 && (buf[6] == 'E') /* why we accept either way here. */ 410 && (buf[7] == 'N') 411 && (buf[8] == 'D'))) 412 { 413 /* We've reached the end of the dumped hash table, so leave. */ 414 return SVN_NO_ERROR; 415 } 416 else if ((buf[0] == 'K') && (buf[1] == ' ')) 417 { 418 size_t keylen; 419 int parsed_len; 420 void *keybuf; 421 422 /* Get the length of the key */ 423 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 424 keylen = parsed_len; 425 426 /* Now read that much into a buffer, + 1 byte for null terminator */ 427 keybuf = apr_palloc(pool, keylen + 1); 428 SVN_ERR(svn_io_file_read_full2(srcfile, 429 keybuf, keylen, 430 &num_read, NULL, pool)); 431 ((char *) keybuf)[keylen] = '\0'; 432 433 /* Suck up extra newline after key data */ 434 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 435 if (c != '\n') 436 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 437 438 /* Read a val length line */ 439 len = sizeof(buf); 440 SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool)); 441 442 if ((buf[0] == 'V') && (buf[1] == ' ')) 443 { 444 svn_string_t *value = apr_palloc(pool, sizeof(*value)); 445 apr_size_t vallen; 446 void *valbuf; 447 448 /* Get the length of the value */ 449 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2)); 450 vallen = parsed_len; 451 452 /* Again, 1 extra byte for the null termination. */ 453 valbuf = apr_palloc(pool, vallen + 1); 454 SVN_ERR(svn_io_file_read_full2(srcfile, 455 valbuf, vallen, 456 &num_read, NULL, pool)); 457 ((char *) valbuf)[vallen] = '\0'; 458 459 /* Suck up extra newline after val data */ 460 SVN_ERR(svn_io_file_getc(&c, srcfile, pool)); 461 if (c != '\n') 462 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 463 464 value->data = valbuf; 465 value->len = vallen; 466 467 /* The Grand Moment: add a new hash entry! */ 468 apr_hash_set(hash, keybuf, keylen, value); 469 } 470 else 471 { 472 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 473 } 474 } 475 else 476 { 477 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL); 478 } 479 } /* while (1) */ 480} 481 482 483 484/*** Diffing hashes ***/ 485 486svn_error_t * 487svn_hash_diff(apr_hash_t *hash_a, 488 apr_hash_t *hash_b, 489 svn_hash_diff_func_t diff_func, 490 void *diff_func_baton, 491 apr_pool_t *pool) 492{ 493 apr_hash_index_t *hi; 494 495 if (hash_a) 496 for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi)) 497 { 498 const void *key; 499 apr_ssize_t klen; 500 501 apr_hash_this(hi, &key, &klen, NULL); 502 503 if (hash_b && (apr_hash_get(hash_b, key, klen))) 504 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both, 505 diff_func_baton)); 506 else 507 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a, 508 diff_func_baton)); 509 } 510 511 if (hash_b) 512 for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi)) 513 { 514 const void *key; 515 apr_ssize_t klen; 516 517 apr_hash_this(hi, &key, &klen, NULL); 518 519 if (! (hash_a && apr_hash_get(hash_a, key, klen))) 520 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b, 521 diff_func_baton)); 522 } 523 524 return SVN_NO_ERROR; 525} 526 527 528/*** Misc. hash APIs ***/ 529 530svn_error_t * 531svn_hash_keys(apr_array_header_t **array, 532 apr_hash_t *hash, 533 apr_pool_t *pool) 534{ 535 apr_hash_index_t *hi; 536 537 *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *)); 538 539 for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi)) 540 { 541 APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi); 542 } 543 544 return SVN_NO_ERROR; 545} 546 547 548svn_error_t * 549svn_hash_from_cstring_keys(apr_hash_t **hash_p, 550 const apr_array_header_t *keys, 551 apr_pool_t *pool) 552{ 553 int i; 554 apr_hash_t *hash = svn_hash__make(pool); 555 for (i = 0; i < keys->nelts; i++) 556 { 557 const char *key = 558 apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *)); 559 svn_hash_sets(hash, key, key); 560 } 561 *hash_p = hash; 562 return SVN_NO_ERROR; 563} 564 565 566void * 567svn_hash__gets_debug(apr_hash_t *ht, const char *key) 568{ 569 return apr_hash_get(ht, key, APR_HASH_KEY_STRING); 570} 571 572 573void 574svn_hash__sets_debug(apr_hash_t *ht, const char *key, const void *val) 575{ 576 apr_hash_set(ht, key, APR_HASH_KEY_STRING, val); 577} 578 579 580 581/*** Specialized getter APIs ***/ 582 583const char * 584svn_hash__get_cstring(apr_hash_t *hash, 585 const char *key, 586 const char *default_value) 587{ 588 if (hash) 589 { 590 const char *value = svn_hash_gets(hash, key); 591 return value ? value : default_value; 592 } 593 594 return default_value; 595} 596 597 598svn_boolean_t 599svn_hash__get_bool(apr_hash_t *hash, const char *key, 600 svn_boolean_t default_value) 601{ 602 const char *tmp_value = svn_hash__get_cstring(hash, key, NULL); 603 svn_tristate_t value = svn_tristate__from_word(tmp_value); 604 605 if (value == svn_tristate_true) 606 return TRUE; 607 else if (value == svn_tristate_false) 608 return FALSE; 609 610 return default_value; 611} 612 613 614 615/*** Optimized hash function ***/ 616 617/* apr_hashfunc_t optimized for the key that we use in SVN: paths and 618 * property names. Its primary goal is speed for keys of known length. 619 * 620 * Since strings tend to spawn large value spaces (usually differ in many 621 * bits with differences spanning a larger section of the key), we can be 622 * quite sloppy extracting a hash value. The more keys there are in a 623 * hash container, the more bits of the value returned by this function 624 * will be used. For a small number of string keys, choosing bits from any 625 * any fix location close to the tail of those keys would usually be good 626 * enough to prevent high collision rates. 627 */ 628static unsigned int 629hashfunc_compatible(const char *char_key, apr_ssize_t *klen) 630{ 631 unsigned int hash = 0; 632 const unsigned char *key = (const unsigned char *)char_key; 633 const unsigned char *p; 634 apr_ssize_t i; 635 636 if (*klen == APR_HASH_KEY_STRING) 637 *klen = strlen(char_key); 638 639#if SVN_UNALIGNED_ACCESS_IS_OK 640 for (p = key, i = *klen; i >= 4; i-=4, p+=4) 641 { 642 apr_uint32_t chunk = *(const apr_uint32_t *)p; 643 644 /* the ">> 17" part gives upper bits in the chunk a chance to make 645 some impact as well */ 646 hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17); 647 } 648#else 649 for (p = key, i = *klen; i >= 4; i-=4, p+=4) 650 { 651 hash = hash * 33 * 33 * 33 * 33 652 + p[0] * 33 * 33 * 33 653 + p[1] * 33 * 33 654 + p[2] * 33 655 + p[3]; 656 } 657#endif 658 for (; i; i--, p++) 659 hash = hash * 33 + *p; 660 661 return hash; 662} 663 664apr_hash_t * 665svn_hash__make(apr_pool_t *pool) 666{ 667 return apr_hash_make_custom(pool, hashfunc_compatible); 668} 669