string.c revision 299742
1/* 2 * string.c: routines to manipulate counted-length strings 3 * (svn_stringbuf_t and svn_string_t) and C strings. 4 * 5 * 6 * ==================================================================== 7 * Licensed to the Apache Software Foundation (ASF) under one 8 * or more contributor license agreements. See the NOTICE file 9 * distributed with this work for additional information 10 * regarding copyright ownership. The ASF licenses this file 11 * to you under the Apache License, Version 2.0 (the 12 * "License"); you may not use this file except in compliance 13 * with the License. You may obtain a copy of the License at 14 * 15 * http://www.apache.org/licenses/LICENSE-2.0 16 * 17 * Unless required by applicable law or agreed to in writing, 18 * software distributed under the License is distributed on an 19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 * KIND, either express or implied. See the License for the 21 * specific language governing permissions and limitations 22 * under the License. 23 * ==================================================================== 24 */ 25 26 27 28#include <apr.h> 29#include <assert.h> 30 31#include <string.h> /* for memcpy(), memcmp(), strlen() */ 32#include <apr_fnmatch.h> 33#include "svn_string.h" /* loads "svn_types.h" and <apr_pools.h> */ 34#include "svn_ctype.h" 35#include "private/svn_dep_compat.h" 36#include "private/svn_string_private.h" 37 38#include "svn_private_config.h" 39 40 41 42/* Allocate the space for a memory buffer from POOL. 43 * Return a pointer to the new buffer in *DATA and its size in *SIZE. 44 * The buffer size will be at least MINIMUM_SIZE. 45 * 46 * N.B.: The stringbuf creation functions use this, but since stringbufs 47 * always consume at least 1 byte for the NUL terminator, the 48 * resulting data pointers will never be NULL. 49 */ 50static APR_INLINE void 51membuf_create(void **data, apr_size_t *size, 52 apr_size_t minimum_size, apr_pool_t *pool) 53{ 54 /* apr_palloc will allocate multiples of 8. 55 * Thus, we would waste some of that memory if we stuck to the 56 * smaller size. Note that this is safe even if apr_palloc would 57 * use some other alignment or none at all. */ 58 minimum_size = APR_ALIGN_DEFAULT(minimum_size); 59 *data = apr_palloc(pool, minimum_size); 60 *size = minimum_size; 61} 62 63/* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE 64 * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE, 65 * this function does nothing. 66 * 67 * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE 68 * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE 69 * will be multiplied by a power of two such that the result is 70 * greater or equal to MINIMUM_SIZE. The pointer to the new buffer 71 * will be returned in *DATA, and its size in *SIZE. 72 */ 73static APR_INLINE void 74membuf_ensure(void **data, apr_size_t *size, 75 apr_size_t minimum_size, apr_pool_t *pool) 76{ 77 if (minimum_size > *size) 78 { 79 apr_size_t new_size = *size; 80 81 if (new_size == 0) 82 new_size = minimum_size; 83 else 84 while (new_size < minimum_size) 85 { 86 const apr_size_t prev_size = new_size; 87 new_size *= 2; 88 89 /* check for apr_size_t overflow */ 90 if (prev_size > new_size) 91 { 92 new_size = minimum_size; 93 break; 94 } 95 } 96 97 membuf_create(data, size, new_size, pool); 98 } 99} 100 101void 102svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool) 103{ 104 membuf_create(&membuf->data, &membuf->size, size, pool); 105 membuf->pool = pool; 106} 107 108void 109svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size) 110{ 111 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); 112} 113 114void 115svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size) 116{ 117 const void *const old_data = membuf->data; 118 const apr_size_t old_size = membuf->size; 119 120 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); 121 122 /* If we re-allocated MEMBUF->DATA, it cannot be NULL. 123 * Statically initialized membuffers (OLD_DATA) may be NULL, though. */ 124 if (old_data && old_data != membuf->data) 125 memcpy(membuf->data, old_data, old_size); 126} 127 128/* Always provide an out-of-line implementation of svn_membuf__zero */ 129#undef svn_membuf__zero 130void 131svn_membuf__zero(svn_membuf_t *membuf) 132{ 133 SVN_MEMBUF__ZERO(membuf); 134} 135 136/* Always provide an out-of-line implementation of svn_membuf__nzero */ 137#undef svn_membuf__nzero 138void 139svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size) 140{ 141 SVN_MEMBUF__NZERO(membuf, size); 142} 143 144static APR_INLINE svn_boolean_t 145string_compare(const char *str1, 146 const char *str2, 147 apr_size_t len1, 148 apr_size_t len2) 149{ 150 /* easy way out :) */ 151 if (len1 != len2) 152 return FALSE; 153 154 /* now the strings must have identical lengths */ 155 156 if ((memcmp(str1, str2, len1)) == 0) 157 return TRUE; 158 else 159 return FALSE; 160} 161 162static APR_INLINE apr_size_t 163string_first_non_whitespace(const char *str, apr_size_t len) 164{ 165 apr_size_t i; 166 167 for (i = 0; i < len; i++) 168 { 169 if (! svn_ctype_isspace(str[i])) 170 return i; 171 } 172 173 /* if we get here, then the string must be entirely whitespace */ 174 return len; 175} 176 177static APR_INLINE apr_size_t 178find_char_backward(const char *str, apr_size_t len, char ch) 179{ 180 apr_size_t i = len; 181 182 while (i != 0) 183 { 184 if (str[--i] == ch) 185 return i; 186 } 187 188 /* char was not found, return len */ 189 return len; 190} 191 192 193/* svn_string functions */ 194 195/* Return a new svn_string_t object, allocated in POOL, initialized with 196 * DATA and SIZE. Do not copy the contents of DATA, just store the pointer. 197 * SIZE is the length in bytes of DATA, excluding the required NUL 198 * terminator. */ 199static svn_string_t * 200create_string(const char *data, apr_size_t size, 201 apr_pool_t *pool) 202{ 203 svn_string_t *new_string; 204 205 new_string = apr_palloc(pool, sizeof(*new_string)); 206 207 new_string->data = data; 208 new_string->len = size; 209 210 return new_string; 211} 212 213/* A data buffer for a zero-length string (just a null terminator). Many 214 * svn_string_t instances may share this same buffer. */ 215static const char empty_buffer[1] = {0}; 216 217svn_string_t * 218svn_string_create_empty(apr_pool_t *pool) 219{ 220 svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string)); 221 new_string->data = empty_buffer; 222 new_string->len = 0; 223 224 return new_string; 225} 226 227 228svn_string_t * 229svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) 230{ 231 void *mem; 232 char *data; 233 svn_string_t *new_string; 234 235 /* Allocate memory for svn_string_t and data in one chunk. */ 236 mem = apr_palloc(pool, sizeof(*new_string) + size + 1); 237 data = (char*)mem + sizeof(*new_string); 238 239 new_string = mem; 240 new_string->data = data; 241 new_string->len = size; 242 243 /* If SIZE is 0, NULL is valid for BYTES. */ 244 if (size) 245 memcpy(data, bytes, size); 246 247 /* Null termination is the convention -- even if we suspect the data 248 to be binary, it's not up to us to decide, it's the caller's 249 call. Heck, that's why they call it the caller! */ 250 data[size] = '\0'; 251 252 return new_string; 253} 254 255 256svn_string_t * 257svn_string_create(const char *cstring, apr_pool_t *pool) 258{ 259 return svn_string_ncreate(cstring, strlen(cstring), pool); 260} 261 262 263svn_string_t * 264svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool) 265{ 266 return svn_string_ncreate(strbuf->data, strbuf->len, pool); 267} 268 269 270svn_string_t * 271svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap) 272{ 273 char *data = apr_pvsprintf(pool, fmt, ap); 274 275 /* wrap an svn_string_t around the new data */ 276 return create_string(data, strlen(data), pool); 277} 278 279 280svn_string_t * 281svn_string_createf(apr_pool_t *pool, const char *fmt, ...) 282{ 283 svn_string_t *str; 284 285 va_list ap; 286 va_start(ap, fmt); 287 str = svn_string_createv(pool, fmt, ap); 288 va_end(ap); 289 290 return str; 291} 292 293 294svn_boolean_t 295svn_string_isempty(const svn_string_t *str) 296{ 297 return (str->len == 0); 298} 299 300 301svn_string_t * 302svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool) 303{ 304 return (original_string ? svn_string_ncreate(original_string->data, 305 original_string->len, pool) 306 : NULL); 307} 308 309 310 311svn_boolean_t 312svn_string_compare(const svn_string_t *str1, const svn_string_t *str2) 313{ 314 return 315 string_compare(str1->data, str2->data, str1->len, str2->len); 316} 317 318 319 320apr_size_t 321svn_string_first_non_whitespace(const svn_string_t *str) 322{ 323 return 324 string_first_non_whitespace(str->data, str->len); 325} 326 327 328apr_size_t 329svn_string_find_char_backward(const svn_string_t *str, char ch) 330{ 331 return find_char_backward(str->data, str->len, ch); 332} 333 334svn_string_t * 335svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf) 336{ 337 /* In debug mode, detect attempts to modify the original STRBUF object. 338 */ 339#ifdef SVN_DEBUG 340 strbuf->pool = NULL; 341 strbuf->blocksize = strbuf->len + 1; 342#endif 343 344 /* Both, svn_string_t and svn_stringbuf_t are public API structures 345 * since the svn epoch. Thus, we can rely on their precise layout not 346 * to change. 347 * 348 * It just so happens that svn_string_t is structurally equivalent 349 * to the (data, len) sub-set of svn_stringbuf_t. There is also no 350 * difference in alignment and padding. So, we can just re-interpret 351 * that part of STRBUF as a svn_string_t. 352 * 353 * However, since svn_string_t does not know about the blocksize 354 * member in svn_stringbuf_t, any attempt to re-size the returned 355 * svn_string_t might invalidate the STRBUF struct. Hence, we consider 356 * the source STRBUF "consumed". 357 * 358 * Modifying the string character content is fine, though. 359 */ 360 return (svn_string_t *)&strbuf->data; 361} 362 363 364 365/* svn_stringbuf functions */ 366 367svn_stringbuf_t * 368svn_stringbuf_create_empty(apr_pool_t *pool) 369{ 370 return svn_stringbuf_create_ensure(0, pool); 371} 372 373svn_stringbuf_t * 374svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool) 375{ 376 void *mem; 377 svn_stringbuf_t *new_string; 378 379 ++blocksize; /* + space for '\0' */ 380 381 /* Allocate memory for svn_string_t and data in one chunk. */ 382 membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool); 383 384 /* Initialize header and string */ 385 new_string = mem; 386 new_string->data = (char*)mem + sizeof(*new_string); 387 new_string->data[0] = '\0'; 388 new_string->len = 0; 389 new_string->blocksize = blocksize - sizeof(*new_string); 390 new_string->pool = pool; 391 392 return new_string; 393} 394 395svn_stringbuf_t * 396svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) 397{ 398 svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool); 399 400 /* If SIZE is 0, NULL is valid for BYTES. */ 401 if (size) 402 memcpy(strbuf->data, bytes, size); 403 404 /* Null termination is the convention -- even if we suspect the data 405 to be binary, it's not up to us to decide, it's the caller's 406 call. Heck, that's why they call it the caller! */ 407 strbuf->data[size] = '\0'; 408 strbuf->len = size; 409 410 return strbuf; 411} 412 413 414svn_stringbuf_t * 415svn_stringbuf_create(const char *cstring, apr_pool_t *pool) 416{ 417 return svn_stringbuf_ncreate(cstring, strlen(cstring), pool); 418} 419 420 421svn_stringbuf_t * 422svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool) 423{ 424 return svn_stringbuf_ncreate(str->data, str->len, pool); 425} 426 427svn_stringbuf_t * 428svn_stringbuf_create_wrap(char *str, apr_pool_t *pool) 429{ 430 svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result)); 431 result->pool = pool; 432 result->data = str; 433 result->len = strlen(str); 434 result->blocksize = result->len + 1; 435 436 return result; 437} 438 439svn_stringbuf_t * 440svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap) 441{ 442 char *data = apr_pvsprintf(pool, fmt, ap); 443 apr_size_t size = strlen(data); 444 svn_stringbuf_t *new_string; 445 446 new_string = apr_palloc(pool, sizeof(*new_string)); 447 new_string->data = data; 448 new_string->len = size; 449 new_string->blocksize = size + 1; 450 new_string->pool = pool; 451 452 return new_string; 453} 454 455 456svn_stringbuf_t * 457svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...) 458{ 459 svn_stringbuf_t *str; 460 461 va_list ap; 462 va_start(ap, fmt); 463 str = svn_stringbuf_createv(pool, fmt, ap); 464 va_end(ap); 465 466 return str; 467} 468 469 470void 471svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c) 472{ 473 memset(str->data, c, str->len); 474} 475 476 477void 478svn_stringbuf_set(svn_stringbuf_t *str, const char *value) 479{ 480 apr_size_t amt = strlen(value); 481 482 svn_stringbuf_ensure(str, amt); 483 memcpy(str->data, value, amt + 1); 484 str->len = amt; 485} 486 487void 488svn_stringbuf_setempty(svn_stringbuf_t *str) 489{ 490 if (str->len > 0) 491 str->data[0] = '\0'; 492 493 str->len = 0; 494} 495 496 497void 498svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes) 499{ 500 if (nbytes > str->len) 501 str->len = 0; 502 else 503 str->len -= nbytes; 504 505 str->data[str->len] = '\0'; 506} 507 508 509svn_boolean_t 510svn_stringbuf_isempty(const svn_stringbuf_t *str) 511{ 512 return (str->len == 0); 513} 514 515 516void 517svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size) 518{ 519 void *mem = NULL; 520 ++minimum_size; /* + space for '\0' */ 521 522 membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool); 523 if (mem && mem != str->data) 524 { 525 if (str->data) 526 memcpy(mem, str->data, str->len + 1); 527 str->data = mem; 528 } 529} 530 531 532/* WARNING - Optimized code ahead! 533 * This function has been hand-tuned for performance. Please read 534 * the comments below before modifying the code. 535 */ 536void 537svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte) 538{ 539 char *dest; 540 apr_size_t old_len = str->len; 541 542 /* In most cases, there will be pre-allocated memory left 543 * to just write the new byte at the end of the used section 544 * and terminate the string properly. 545 */ 546 if (str->blocksize > old_len + 1) 547 { 548 /* The following read does not depend this write, so we 549 * can issue the write first to minimize register pressure: 550 * The value of old_len+1 is no longer needed; on most processors, 551 * dest[old_len+1] will be calculated implicitly as part of 552 * the addressing scheme. 553 */ 554 str->len = old_len+1; 555 556 /* Since the compiler cannot be sure that *src->data and *src 557 * don't overlap, we read src->data *once* before writing 558 * to *src->data. Replacing dest with str->data would force 559 * the compiler to read it again after the first byte. 560 */ 561 dest = str->data; 562 563 /* If not already available in a register as per ABI, load 564 * "byte" into the register (e.g. the one freed from old_len+1), 565 * then write it to the string buffer and terminate it properly. 566 * 567 * Including the "byte" fetch, all operations so far could be 568 * issued at once and be scheduled at the CPU's descression. 569 * Most likely, no-one will soon depend on the data that will be 570 * written in this function. So, no stalls there, either. 571 */ 572 dest[old_len] = byte; 573 dest[old_len+1] = '\0'; 574 } 575 else 576 { 577 /* we need to re-allocate the string buffer 578 * -> let the more generic implementation take care of that part 579 */ 580 581 /* Depending on the ABI, "byte" is a register value. If we were 582 * to take its address directly, the compiler might decide to 583 * put in on the stack *unconditionally*, even if that would 584 * only be necessary for this block. 585 */ 586 char b = byte; 587 svn_stringbuf_appendbytes(str, &b, 1); 588 } 589} 590 591 592void 593svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes, 594 apr_size_t count) 595{ 596 apr_size_t total_len; 597 void *start_address; 598 599 if (!count) 600 /* Allow BYTES to be NULL by avoiding passing it to memcpy. */ 601 return; 602 603 total_len = str->len + count; /* total size needed */ 604 605 /* svn_stringbuf_ensure adds 1 for null terminator. */ 606 svn_stringbuf_ensure(str, total_len); 607 608 /* get address 1 byte beyond end of original bytestring */ 609 start_address = (str->data + str->len); 610 611 memcpy(start_address, bytes, count); 612 str->len = total_len; 613 614 str->data[str->len] = '\0'; /* We don't know if this is binary 615 data or not, but convention is 616 to null-terminate. */ 617} 618 619void 620svn_stringbuf_appendfill(svn_stringbuf_t *str, 621 char byte, 622 apr_size_t count) 623{ 624 apr_size_t new_len = str->len + count; 625 svn_stringbuf_ensure(str, new_len); 626 627 memset(str->data + str->len, byte, count); 628 629 /* update buffer length and always NUL-terminate it */ 630 str->len = new_len; 631 str->data[new_len] = '\0'; 632} 633 634 635void 636svn_stringbuf_appendstr(svn_stringbuf_t *targetstr, 637 const svn_stringbuf_t *appendstr) 638{ 639 svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len); 640} 641 642 643void 644svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr) 645{ 646 svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr)); 647} 648 649void 650svn_stringbuf_insert(svn_stringbuf_t *str, 651 apr_size_t pos, 652 const char *bytes, 653 apr_size_t count) 654{ 655 /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */ 656 if (count == 0) 657 return; 658 659 /* special case: BYTES overlaps with this string -> copy the source */ 660 if (bytes + count > str->data && bytes < str->data + str->blocksize) 661 bytes = apr_pmemdup(str->pool, bytes, count); 662 663 if (pos > str->len) 664 pos = str->len; 665 666 svn_stringbuf_ensure(str, str->len + count); 667 memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); 668 memcpy(str->data + pos, bytes, count); 669 670 str->len += count; 671} 672 673void 674svn_stringbuf_remove(svn_stringbuf_t *str, 675 apr_size_t pos, 676 apr_size_t count) 677{ 678 if (pos > str->len) 679 pos = str->len; 680 if (count > str->len - pos) 681 count = str->len - pos; 682 683 memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1); 684 str->len -= count; 685} 686 687void 688svn_stringbuf_replace(svn_stringbuf_t *str, 689 apr_size_t pos, 690 apr_size_t old_count, 691 const char *bytes, 692 apr_size_t new_count) 693{ 694 /* For COUNT==0, we allow BYTES to be NULL. 695 * In that case, this is just a substring removal. */ 696 if (new_count == 0) 697 { 698 svn_stringbuf_remove(str, pos, old_count); 699 return; 700 } 701 702 /* special case: BYTES overlaps with this string -> copy the source */ 703 if (bytes + new_count > str->data && bytes < str->data + str->blocksize) 704 bytes = apr_pmemdup(str->pool, bytes, new_count); 705 706 if (pos > str->len) 707 pos = str->len; 708 if (old_count > str->len - pos) 709 old_count = str->len - pos; 710 711 if (old_count < new_count) 712 { 713 apr_size_t delta = new_count - old_count; 714 svn_stringbuf_ensure(str, str->len + delta); 715 } 716 717 if (old_count != new_count) 718 memmove(str->data + pos + new_count, str->data + pos + old_count, 719 str->len - pos - old_count + 1); 720 721 memcpy(str->data + pos, bytes, new_count); 722 str->len += new_count - old_count; 723} 724 725 726svn_stringbuf_t * 727svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool) 728{ 729 return (svn_stringbuf_ncreate(original_string->data, 730 original_string->len, pool)); 731} 732 733 734 735svn_boolean_t 736svn_stringbuf_compare(const svn_stringbuf_t *str1, 737 const svn_stringbuf_t *str2) 738{ 739 return string_compare(str1->data, str2->data, str1->len, str2->len); 740} 741 742 743 744apr_size_t 745svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str) 746{ 747 return string_first_non_whitespace(str->data, str->len); 748} 749 750 751void 752svn_stringbuf_strip_whitespace(svn_stringbuf_t *str) 753{ 754 /* Find first non-whitespace character */ 755 apr_size_t offset = svn_stringbuf_first_non_whitespace(str); 756 757 /* Go ahead! Waste some RAM, we've got pools! :) */ 758 str->data += offset; 759 str->len -= offset; 760 str->blocksize -= offset; 761 762 /* Now that we've trimmed the front, trim the end, wasting more RAM. */ 763 while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1])) 764 str->len--; 765 str->data[str->len] = '\0'; 766} 767 768 769apr_size_t 770svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch) 771{ 772 return find_char_backward(str->data, str->len, ch); 773} 774 775 776svn_boolean_t 777svn_string_compare_stringbuf(const svn_string_t *str1, 778 const svn_stringbuf_t *str2) 779{ 780 return string_compare(str1->data, str2->data, str1->len, str2->len); 781} 782 783 784 785/*** C string stuff. ***/ 786 787void 788svn_cstring_split_append(apr_array_header_t *array, 789 const char *input, 790 const char *sep_chars, 791 svn_boolean_t chop_whitespace, 792 apr_pool_t *pool) 793{ 794 char *pats; 795 char *p; 796 797 pats = apr_pstrdup(pool, input); /* strtok wants non-const data */ 798 p = svn_cstring_tokenize(sep_chars, &pats); 799 800 while (p) 801 { 802 if (chop_whitespace) 803 { 804 while (svn_ctype_isspace(*p)) 805 p++; 806 807 { 808 char *e = p + (strlen(p) - 1); 809 while ((e >= p) && (svn_ctype_isspace(*e))) 810 e--; 811 *(++e) = '\0'; 812 } 813 } 814 815 if (p[0] != '\0') 816 APR_ARRAY_PUSH(array, const char *) = p; 817 818 p = svn_cstring_tokenize(sep_chars, &pats); 819 } 820 821 return; 822} 823 824 825apr_array_header_t * 826svn_cstring_split(const char *input, 827 const char *sep_chars, 828 svn_boolean_t chop_whitespace, 829 apr_pool_t *pool) 830{ 831 apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input)); 832 svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool); 833 return a; 834} 835 836 837svn_boolean_t svn_cstring_match_glob_list(const char *str, 838 const apr_array_header_t *list) 839{ 840 int i; 841 842 for (i = 0; i < list->nelts; i++) 843 { 844 const char *this_pattern = APR_ARRAY_IDX(list, i, char *); 845 846 if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS) 847 return TRUE; 848 } 849 850 return FALSE; 851} 852 853svn_boolean_t 854svn_cstring_match_list(const char *str, const apr_array_header_t *list) 855{ 856 int i; 857 858 for (i = 0; i < list->nelts; i++) 859 { 860 const char *this_str = APR_ARRAY_IDX(list, i, char *); 861 862 if (strcmp(this_str, str) == 0) 863 return TRUE; 864 } 865 866 return FALSE; 867} 868 869char * 870svn_cstring_tokenize(const char *sep, char **str) 871{ 872 char *token; 873 char *next; 874 char csep; 875 876 /* check parameters */ 877 if ((sep == NULL) || (str == NULL) || (*str == NULL)) 878 return NULL; 879 880 /* let APR handle edge cases and multiple separators */ 881 csep = *sep; 882 if (csep == '\0' || sep[1] != '\0') 883 return apr_strtok(NULL, sep, str); 884 885 /* skip characters in sep (will terminate at '\0') */ 886 token = *str; 887 while (*token == csep) 888 ++token; 889 890 if (!*token) /* no more tokens */ 891 return NULL; 892 893 /* skip valid token characters to terminate token and 894 * prepare for the next call (will terminate at '\0) 895 */ 896 next = strchr(token, csep); 897 if (next == NULL) 898 { 899 *str = token + strlen(token); 900 } 901 else 902 { 903 *next = '\0'; 904 *str = next + 1; 905 } 906 907 return token; 908} 909 910int svn_cstring_count_newlines(const char *msg) 911{ 912 int count = 0; 913 const char *p; 914 915 for (p = msg; *p; p++) 916 { 917 if (*p == '\n') 918 { 919 count++; 920 if (*(p + 1) == '\r') 921 p++; 922 } 923 else if (*p == '\r') 924 { 925 count++; 926 if (*(p + 1) == '\n') 927 p++; 928 } 929 } 930 931 return count; 932} 933 934char * 935svn_cstring_join(const apr_array_header_t *strings, 936 const char *separator, 937 apr_pool_t *pool) 938{ 939 svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool); 940 size_t sep_len = strlen(separator); 941 int i; 942 943 for (i = 0; i < strings->nelts; i++) 944 { 945 const char *string = APR_ARRAY_IDX(strings, i, const char *); 946 svn_stringbuf_appendbytes(new_str, string, strlen(string)); 947 svn_stringbuf_appendbytes(new_str, separator, sep_len); 948 } 949 return new_str->data; 950} 951 952int 953svn_cstring_casecmp(const char *str1, const char *str2) 954{ 955 for (;;) 956 { 957 const int a = *str1++; 958 const int b = *str2++; 959 const int cmp = svn_ctype_casecmp(a, b); 960 if (cmp || !a || !b) 961 return cmp; 962 } 963} 964 965svn_error_t * 966svn_cstring_strtoui64(apr_uint64_t *n, const char *str, 967 apr_uint64_t minval, apr_uint64_t maxval, 968 int base) 969{ 970 apr_int64_t val; 971 char *endptr; 972 973 /* We assume errno is thread-safe. */ 974 errno = 0; /* APR-0.9 doesn't always set errno */ 975 976 /* ### We're throwing away half the number range here. 977 * ### APR needs a apr_strtoui64() function. */ 978 val = apr_strtoi64(str, &endptr, base); 979 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0') 980 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 981 _("Could not convert '%s' into a number"), 982 str); 983 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) || 984 val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval) 985 /* ### Mark this for translation when gettext doesn't choke on macros. */ 986 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 987 "Number '%s' is out of range " 988 "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'", 989 str, minval, maxval); 990 *n = val; 991 return SVN_NO_ERROR; 992} 993 994svn_error_t * 995svn_cstring_atoui64(apr_uint64_t *n, const char *str) 996{ 997 return svn_error_trace(svn_cstring_strtoui64(n, str, 0, 998 APR_UINT64_MAX, 10)); 999} 1000 1001svn_error_t * 1002svn_cstring_atoui(unsigned int *n, const char *str) 1003{ 1004 apr_uint64_t val; 1005 1006 SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10)); 1007 *n = (unsigned int)val; 1008 return SVN_NO_ERROR; 1009} 1010 1011svn_error_t * 1012svn_cstring_strtoi64(apr_int64_t *n, const char *str, 1013 apr_int64_t minval, apr_int64_t maxval, 1014 int base) 1015{ 1016 apr_int64_t val; 1017 char *endptr; 1018 1019 /* We assume errno is thread-safe. */ 1020 errno = 0; /* APR-0.9 doesn't always set errno */ 1021 1022 val = apr_strtoi64(str, &endptr, base); 1023 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0') 1024 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 1025 _("Could not convert '%s' into a number"), 1026 str); 1027 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) || 1028 val < minval || val > maxval) 1029 /* ### Mark this for translation when gettext doesn't choke on macros. */ 1030 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 1031 "Number '%s' is out of range " 1032 "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'", 1033 str, minval, maxval); 1034 *n = val; 1035 return SVN_NO_ERROR; 1036} 1037 1038svn_error_t * 1039svn_cstring_atoi64(apr_int64_t *n, const char *str) 1040{ 1041 return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN, 1042 APR_INT64_MAX, 10)); 1043} 1044 1045svn_error_t * 1046svn_cstring_atoi(int *n, const char *str) 1047{ 1048 apr_int64_t val; 1049 1050 SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10)); 1051 *n = (int)val; 1052 return SVN_NO_ERROR; 1053} 1054 1055unsigned long 1056svn__strtoul(const char* buffer, const char** end) 1057{ 1058 unsigned long result = 0; 1059 1060 /* this loop will execute in just 2 CPU cycles, confirmed by measurement: 1061 7 macro-ops (max 4 / cycle => 2 cycles) 1062 1 load (max 1 / cycle) 1063 1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle) 1064 2 arithmetic ops (subtract, increment; max 3 / cycle) 1065 2 scale-and-add AGU ops (max 3 / cycle) 1066 1 compiler-generated move operation 1067 dependency chain: temp = result * 4 + result; result = temp * 2 + c 1068 (2 ops with latency 1 => 2 cycles) 1069 */ 1070 while (1) 1071 { 1072 unsigned long c = (unsigned char)*buffer - (unsigned char)'0'; 1073 if (c > 9) 1074 break; 1075 1076 result = result * 10 + c; 1077 ++buffer; 1078 } 1079 1080 *end = buffer; 1081 return result; 1082} 1083 1084/* "Precalculated" itoa values for 2 places (including leading zeros). 1085 * For maximum performance, make sure all table entries are word-aligned. 1086 */ 1087static const char decimal_table[100][4] 1088 = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09" 1089 , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19" 1090 , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29" 1091 , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39" 1092 , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49" 1093 , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59" 1094 , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69" 1095 , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79" 1096 , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89" 1097 , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"}; 1098 1099/* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */ 1100#define COPY_TWO_BYTES(dest,source)\ 1101 memcpy((dest), (source), 2) 1102 1103apr_size_t 1104svn__ui64toa(char * dest, apr_uint64_t number) 1105{ 1106 char buffer[SVN_INT64_BUFFER_SIZE]; 1107 apr_uint32_t reduced; /* used for 32 bit DIV */ 1108 char* target; 1109 1110 /* Small numbers are by far the most common case. 1111 * Therefore, we use special code. 1112 */ 1113 if (number < 100) 1114 { 1115 if (number < 10) 1116 { 1117 dest[0] = (char)('0' + number); 1118 dest[1] = 0; 1119 return 1; 1120 } 1121 else 1122 { 1123 COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]); 1124 dest[2] = 0; 1125 return 2; 1126 } 1127 } 1128 1129 /* Standard code. Write string in pairs of chars back-to-front */ 1130 buffer[SVN_INT64_BUFFER_SIZE - 1] = 0; 1131 target = &buffer[SVN_INT64_BUFFER_SIZE - 3]; 1132 1133 /* Loop may be executed 0 .. 2 times. */ 1134 while (number >= 100000000) 1135 { 1136 /* Number is larger than 100^4, i.e. we can write 4x2 chars. 1137 * Also, use 32 bit DIVs as these are about twice as fast. 1138 */ 1139 reduced = (apr_uint32_t)(number % 100000000); 1140 number /= 100000000; 1141 1142 COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]); 1143 reduced /= 100; 1144 COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]); 1145 reduced /= 100; 1146 COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]); 1147 reduced /= 100; 1148 COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]); 1149 target -= 8; 1150 } 1151 1152 /* Now, the number fits into 32 bits, but may still be larger than 99 */ 1153 reduced = (apr_uint32_t)(number); 1154 while (reduced >= 100) 1155 { 1156 COPY_TWO_BYTES(target, decimal_table[reduced % 100]); 1157 reduced /= 100; 1158 target -= 2; 1159 } 1160 1161 /* The number is now smaller than 100 but larger than 1 */ 1162 COPY_TWO_BYTES(target, decimal_table[reduced]); 1163 1164 /* Correction for uneven count of places. */ 1165 if (reduced < 10) 1166 ++target; 1167 1168 /* Copy to target */ 1169 memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target); 1170 return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1; 1171} 1172 1173apr_size_t 1174svn__i64toa(char * dest, apr_int64_t number) 1175{ 1176 if (number >= 0) 1177 return svn__ui64toa(dest, (apr_uint64_t)number); 1178 1179 *dest = '-'; 1180 return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1; 1181} 1182 1183static void 1184ui64toa_sep(apr_uint64_t number, char separator, char *buffer) 1185{ 1186 apr_size_t length = svn__ui64toa(buffer, number); 1187 apr_size_t i; 1188 1189 for (i = length; i > 3; i -= 3) 1190 { 1191 memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3); 1192 buffer[i-3] = separator; 1193 length++; 1194 } 1195 1196 buffer[length] = 0; 1197} 1198 1199char * 1200svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool) 1201{ 1202 char buffer[2 * SVN_INT64_BUFFER_SIZE]; 1203 ui64toa_sep(number, separator, buffer); 1204 1205 return apr_pstrdup(pool, buffer); 1206} 1207 1208char * 1209svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool) 1210{ 1211 char buffer[2 * SVN_INT64_BUFFER_SIZE]; 1212 if (number < 0) 1213 { 1214 buffer[0] = '-'; 1215 ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]); 1216 } 1217 else 1218 ui64toa_sep((apr_uint64_t)(number), separator, buffer); 1219 1220 return apr_pstrdup(pool, buffer); 1221} 1222 1223apr_size_t 1224svn__ui64tobase36(char *dest, apr_uint64_t value) 1225{ 1226 char *dest_start = dest; 1227 if (value < 10) 1228 { 1229 /* pretty frequent and trivial case. Make it fast. */ 1230 *(dest++) = (char)(value) + '0'; 1231 } 1232 else 1233 { 1234 char buffer[SVN_INT64_BUFFER_SIZE]; 1235 char *p = buffer; 1236 1237 /* write result as little-endian to buffer */ 1238 while (value > 0) 1239 { 1240 char c = (char)(value % 36); 1241 value /= 36; 1242 1243 *p = (c <= 9) ? (c + '0') : (c - 10 + 'a'); 1244 ++p; 1245 } 1246 1247 /* copy as big-endian to DEST */ 1248 while (p > buffer) 1249 *(dest++) = *(--p); 1250 } 1251 1252 *dest = '\0'; 1253 return dest - dest_start; 1254} 1255 1256apr_uint64_t 1257svn__base36toui64(const char **next, const char *source) 1258{ 1259 apr_uint64_t result = 0; 1260 apr_uint64_t factor = 1; 1261 int i = 0; 1262 char digits[SVN_INT64_BUFFER_SIZE]; 1263 1264 /* convert digits to numerical values and count the number of places. 1265 * Also, prevent buffer overflow. */ 1266 while (i < sizeof(digits)) 1267 { 1268 char c = *source; 1269 if (c < 'a') 1270 { 1271 /* includes detection of NUL terminator */ 1272 if (c < '0' || c > '9') 1273 break; 1274 1275 c -= '0'; 1276 } 1277 else 1278 { 1279 if (c < 'a' || c > 'z') 1280 break; 1281 1282 c -= 'a' - 10; 1283 } 1284 1285 digits[i++] = c; 1286 source++; 1287 } 1288 1289 /* fold digits into the result */ 1290 while (i > 0) 1291 { 1292 result += factor * (apr_uint64_t)digits[--i]; 1293 factor *= 36; 1294 } 1295 1296 if (next) 1297 *next = source; 1298 1299 return result; 1300} 1301 1302 1303apr_size_t 1304svn_cstring__similarity(const char *stra, const char *strb, 1305 svn_membuf_t *buffer, apr_size_t *rlcs) 1306{ 1307 svn_string_t stringa, stringb; 1308 stringa.data = stra; 1309 stringa.len = strlen(stra); 1310 stringb.data = strb; 1311 stringb.len = strlen(strb); 1312 return svn_string__similarity(&stringa, &stringb, buffer, rlcs); 1313} 1314 1315apr_size_t 1316svn_string__similarity(const svn_string_t *stringa, 1317 const svn_string_t *stringb, 1318 svn_membuf_t *buffer, apr_size_t *rlcs) 1319{ 1320 const char *stra = stringa->data; 1321 const char *strb = stringb->data; 1322 const apr_size_t lena = stringa->len; 1323 const apr_size_t lenb = stringb->len; 1324 const apr_size_t total = lena + lenb; 1325 const char *enda = stra + lena; 1326 const char *endb = strb + lenb; 1327 apr_size_t lcs = 0; 1328 1329 /* Skip the common prefix ... */ 1330 while (stra < enda && strb < endb && *stra == *strb) 1331 { 1332 ++stra; ++strb; 1333 ++lcs; 1334 } 1335 1336 /* ... and the common suffix */ 1337 while (stra < enda && strb < endb) 1338 { 1339 --enda; --endb; 1340 if (*enda != *endb) 1341 { 1342 ++enda; ++endb; 1343 break; 1344 } 1345 1346 ++lcs; 1347 } 1348 1349 if (stra < enda && strb < endb) 1350 { 1351 const apr_size_t resta = enda - stra; 1352 const apr_size_t restb = endb - strb; 1353 const apr_size_t slots = (resta > restb ? restb : resta); 1354 apr_size_t *curr, *prev; 1355 const char *pstr; 1356 1357 /* The outer loop must iterate on the longer string. */ 1358 if (resta < restb) 1359 { 1360 pstr = stra; 1361 stra = strb; 1362 strb = pstr; 1363 1364 pstr = enda; 1365 enda = endb; 1366 endb = pstr; 1367 } 1368 1369 /* Allocate two columns in the LCS matrix 1370 ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */ 1371 svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t)); 1372 svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t)); 1373 prev = buffer->data; 1374 curr = prev + slots + 1; 1375 1376 /* Calculate LCS length of the remainder */ 1377 for (pstr = stra; pstr < enda; ++pstr) 1378 { 1379 apr_size_t i; 1380 for (i = 1; i <= slots; ++i) 1381 { 1382 if (*pstr == strb[i-1]) 1383 curr[i] = prev[i-1] + 1; 1384 else 1385 curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]); 1386 } 1387 1388 /* Swap the buffers, making the previous one current */ 1389 { 1390 apr_size_t *const temp = prev; 1391 prev = curr; 1392 curr = temp; 1393 } 1394 } 1395 1396 lcs += prev[slots]; 1397 } 1398 1399 if (rlcs) 1400 *rlcs = lcs; 1401 1402 /* Return similarity ratio rounded to 4 significant digits */ 1403 if (total) 1404 return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total); 1405 else 1406 return SVN_STRING__SIM_RANGE_MAX; 1407} 1408 1409apr_size_t 1410svn_cstring__match_length(const char *a, 1411 const char *b, 1412 apr_size_t max_len) 1413{ 1414 apr_size_t pos = 0; 1415 1416#if SVN_UNALIGNED_ACCESS_IS_OK 1417 1418 /* Chunky processing is so much faster ... 1419 * 1420 * We can't make this work on architectures that require aligned access 1421 * because A and B will probably have different alignment. So, skipping 1422 * the first few chars until alignment is reached is not an option. 1423 */ 1424 for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) 1425 if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) 1426 break; 1427 1428#endif 1429 1430 for (; pos < max_len; ++pos) 1431 if (a[pos] != b[pos]) 1432 break; 1433 1434 return pos; 1435} 1436 1437apr_size_t 1438svn_cstring__reverse_match_length(const char *a, 1439 const char *b, 1440 apr_size_t max_len) 1441{ 1442 apr_size_t pos = 0; 1443 1444#if SVN_UNALIGNED_ACCESS_IS_OK 1445 1446 /* Chunky processing is so much faster ... 1447 * 1448 * We can't make this work on architectures that require aligned access 1449 * because A and B will probably have different alignment. So, skipping 1450 * the first few chars until alignment is reached is not an option. 1451 */ 1452 for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) 1453 if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) 1454 break; 1455 1456 pos -= sizeof(apr_size_t); 1457 1458#endif 1459 1460 /* If we find a mismatch at -pos, pos-1 characters matched. 1461 */ 1462 while (++pos <= max_len) 1463 if (a[0-pos] != b[0-pos]) 1464 return pos - 1; 1465 1466 /* No mismatch found -> at least MAX_LEN matching chars. 1467 */ 1468 return max_len; 1469} 1470 1471const char * 1472svn_cstring_skip_prefix(const char *str, const char *prefix) 1473{ 1474 apr_size_t len = strlen(prefix); 1475 1476 if (strncmp(str, prefix, len) == 0) 1477 { 1478 return str + len; 1479 } 1480 else 1481 { 1482 return NULL; 1483 } 1484} 1485