string.c revision 289166
1/*
2 * string.c:  routines to manipulate counted-length strings
3 *            (svn_stringbuf_t and svn_string_t) and C strings.
4 *
5 *
6 * ====================================================================
7 *    Licensed to the Apache Software Foundation (ASF) under one
8 *    or more contributor license agreements.  See the NOTICE file
9 *    distributed with this work for additional information
10 *    regarding copyright ownership.  The ASF licenses this file
11 *    to you under the Apache License, Version 2.0 (the
12 *    "License"); you may not use this file except in compliance
13 *    with the License.  You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 *    Unless required by applicable law or agreed to in writing,
18 *    software distributed under the License is distributed on an
19 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 *    KIND, either express or implied.  See the License for the
21 *    specific language governing permissions and limitations
22 *    under the License.
23 * ====================================================================
24 */
25
26
27
28#include <apr.h>
29
30#include <string.h>      /* for memcpy(), memcmp(), strlen() */
31#include <apr_fnmatch.h>
32#include "svn_string.h"  /* loads "svn_types.h" and <apr_pools.h> */
33#include "svn_ctype.h"
34#include "private/svn_dep_compat.h"
35#include "private/svn_string_private.h"
36
37#include "svn_private_config.h"
38
39
40
41/* Allocate the space for a memory buffer from POOL.
42 * Return a pointer to the new buffer in *DATA and its size in *SIZE.
43 * The buffer size will be at least MINIMUM_SIZE.
44 *
45 * N.B.: The stringbuf creation functions use this, but since stringbufs
46 *       always consume at least 1 byte for the NUL terminator, the
47 *       resulting data pointers will never be NULL.
48 */
49static APR_INLINE void
50membuf_create(void **data, apr_size_t *size,
51              apr_size_t minimum_size, apr_pool_t *pool)
52{
53  /* apr_palloc will allocate multiples of 8.
54   * Thus, we would waste some of that memory if we stuck to the
55   * smaller size. Note that this is safe even if apr_palloc would
56   * use some other aligment or none at all. */
57  minimum_size = APR_ALIGN_DEFAULT(minimum_size);
58  *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size));
59  *size = minimum_size;
60}
61
62/* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE
63 * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE,
64 * this function does nothing.
65 *
66 * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE
67 * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE
68 * will be multiplied by a power of two such that the result is
69 * greater or equal to MINIMUM_SIZE. The pointer to the new buffer
70 * will be returned in *DATA, and its size in *SIZE.
71 */
72static APR_INLINE void
73membuf_ensure(void **data, apr_size_t *size,
74              apr_size_t minimum_size, apr_pool_t *pool)
75{
76  if (minimum_size > *size)
77    {
78      apr_size_t new_size = *size;
79
80      if (new_size == 0)
81        /* APR will increase odd allocation sizes to the next
82         * multiple for 8, for instance. Take advantage of that
83         * knowledge and allow for the extra size to be used. */
84        new_size = minimum_size;
85      else
86        while (new_size < minimum_size)
87          {
88            /* new_size is aligned; doubling it should keep it aligned */
89            const apr_size_t prev_size = new_size;
90            new_size *= 2;
91
92            /* check for apr_size_t overflow */
93            if (prev_size > new_size)
94              {
95                new_size = minimum_size;
96                break;
97              }
98          }
99
100      membuf_create(data, size, new_size, pool);
101    }
102}
103
104void
105svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool)
106{
107  membuf_create(&membuf->data, &membuf->size, size, pool);
108  membuf->pool = pool;
109}
110
111void
112svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size)
113{
114  membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
115}
116
117void
118svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
119{
120  const void *const old_data = membuf->data;
121  const apr_size_t old_size = membuf->size;
122
123  membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
124  if (membuf->data && old_data && old_data != membuf->data)
125    memcpy(membuf->data, old_data, old_size);
126}
127
128/* Always provide an out-of-line implementation of svn_membuf__zero */
129#undef svn_membuf__zero
130void
131svn_membuf__zero(svn_membuf_t *membuf)
132{
133  SVN_MEMBUF__ZERO(membuf);
134}
135
136/* Always provide an out-of-line implementation of svn_membuf__nzero */
137#undef svn_membuf__nzero
138void
139svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size)
140{
141  SVN_MEMBUF__NZERO(membuf, size);
142}
143
144static APR_INLINE svn_boolean_t
145string_compare(const char *str1,
146               const char *str2,
147               apr_size_t len1,
148               apr_size_t len2)
149{
150  /* easy way out :)  */
151  if (len1 != len2)
152    return FALSE;
153
154  /* now the strings must have identical lenghths */
155
156  if ((memcmp(str1, str2, len1)) == 0)
157    return TRUE;
158  else
159    return FALSE;
160}
161
162static APR_INLINE apr_size_t
163string_first_non_whitespace(const char *str, apr_size_t len)
164{
165  apr_size_t i;
166
167  for (i = 0; i < len; i++)
168    {
169      if (! svn_ctype_isspace(str[i]))
170        return i;
171    }
172
173  /* if we get here, then the string must be entirely whitespace */
174  return len;
175}
176
177static APR_INLINE apr_size_t
178find_char_backward(const char *str, apr_size_t len, char ch)
179{
180  apr_size_t i = len;
181
182  while (i != 0)
183    {
184      if (str[--i] == ch)
185        return i;
186    }
187
188  /* char was not found, return len */
189  return len;
190}
191
192
193/* svn_string functions */
194
195/* Return a new svn_string_t object, allocated in POOL, initialized with
196 * DATA and SIZE.  Do not copy the contents of DATA, just store the pointer.
197 * SIZE is the length in bytes of DATA, excluding the required NUL
198 * terminator. */
199static svn_string_t *
200create_string(const char *data, apr_size_t size,
201              apr_pool_t *pool)
202{
203  svn_string_t *new_string;
204
205  new_string = apr_palloc(pool, sizeof(*new_string));
206
207  new_string->data = data;
208  new_string->len = size;
209
210  return new_string;
211}
212
213/* A data buffer for a zero-length string (just a null terminator).  Many
214 * svn_string_t instances may share this same buffer. */
215static const char empty_buffer[1] = {0};
216
217svn_string_t *
218svn_string_create_empty(apr_pool_t *pool)
219{
220  svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string));
221  new_string->data = empty_buffer;
222  new_string->len = 0;
223
224  return new_string;
225}
226
227
228svn_string_t *
229svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
230{
231  void *mem;
232  char *data;
233  svn_string_t *new_string;
234
235  /* Allocate memory for svn_string_t and data in one chunk. */
236  mem = apr_palloc(pool, sizeof(*new_string) + size + 1);
237  data = (char*)mem + sizeof(*new_string);
238
239  new_string = mem;
240  new_string->data = data;
241  new_string->len = size;
242
243  memcpy(data, bytes, size);
244
245  /* Null termination is the convention -- even if we suspect the data
246     to be binary, it's not up to us to decide, it's the caller's
247     call.  Heck, that's why they call it the caller! */
248  data[size] = '\0';
249
250  return new_string;
251}
252
253
254svn_string_t *
255svn_string_create(const char *cstring, apr_pool_t *pool)
256{
257  return svn_string_ncreate(cstring, strlen(cstring), pool);
258}
259
260
261svn_string_t *
262svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool)
263{
264  return svn_string_ncreate(strbuf->data, strbuf->len, pool);
265}
266
267
268svn_string_t *
269svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap)
270{
271  char *data = apr_pvsprintf(pool, fmt, ap);
272
273  /* wrap an svn_string_t around the new data */
274  return create_string(data, strlen(data), pool);
275}
276
277
278svn_string_t *
279svn_string_createf(apr_pool_t *pool, const char *fmt, ...)
280{
281  svn_string_t *str;
282
283  va_list ap;
284  va_start(ap, fmt);
285  str = svn_string_createv(pool, fmt, ap);
286  va_end(ap);
287
288  return str;
289}
290
291
292svn_boolean_t
293svn_string_isempty(const svn_string_t *str)
294{
295  return (str->len == 0);
296}
297
298
299svn_string_t *
300svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
301{
302  return (svn_string_ncreate(original_string->data,
303                             original_string->len, pool));
304}
305
306
307
308svn_boolean_t
309svn_string_compare(const svn_string_t *str1, const svn_string_t *str2)
310{
311  return
312    string_compare(str1->data, str2->data, str1->len, str2->len);
313}
314
315
316
317apr_size_t
318svn_string_first_non_whitespace(const svn_string_t *str)
319{
320  return
321    string_first_non_whitespace(str->data, str->len);
322}
323
324
325apr_size_t
326svn_string_find_char_backward(const svn_string_t *str, char ch)
327{
328  return find_char_backward(str->data, str->len, ch);
329}
330
331svn_string_t *
332svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf)
333{
334  /* In debug mode, detect attempts to modify the original STRBUF object.
335   */
336#ifdef SVN_DEBUG
337  strbuf->pool = NULL;
338  strbuf->blocksize = strbuf->len + 1;
339#endif
340
341  /* Both, svn_string_t and svn_stringbuf_t are public API structures
342   * since the svn epoch. Thus, we can rely on their precise layout not
343   * to change.
344   *
345   * It just so happens that svn_string_t is structurally equivalent
346   * to the (data, len) sub-set of svn_stringbuf_t. There is also no
347   * difference in alignment and padding. So, we can just re-interpret
348   * that part of STRBUF as a svn_string_t.
349   *
350   * However, since svn_string_t does not know about the blocksize
351   * member in svn_stringbuf_t, any attempt to re-size the returned
352   * svn_string_t might invalidate the STRBUF struct. Hence, we consider
353   * the source STRBUF "consumed".
354   *
355   * Modifying the string character content is fine, though.
356   */
357  return (svn_string_t *)&strbuf->data;
358}
359
360
361
362/* svn_stringbuf functions */
363
364svn_stringbuf_t *
365svn_stringbuf_create_empty(apr_pool_t *pool)
366{
367  return svn_stringbuf_create_ensure(0, pool);
368}
369
370svn_stringbuf_t *
371svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool)
372{
373  void *mem;
374  svn_stringbuf_t *new_string;
375
376  ++blocksize; /* + space for '\0' */
377
378  /* Allocate memory for svn_string_t and data in one chunk. */
379  membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool);
380
381  /* Initialize header and string */
382  new_string = mem;
383  new_string->data = (char*)mem + sizeof(*new_string);
384  new_string->data[0] = '\0';
385  new_string->len = 0;
386  new_string->blocksize = blocksize - sizeof(*new_string);
387  new_string->pool = pool;
388
389  return new_string;
390}
391
392svn_stringbuf_t *
393svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
394{
395  svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
396  memcpy(strbuf->data, bytes, size);
397
398  /* Null termination is the convention -- even if we suspect the data
399     to be binary, it's not up to us to decide, it's the caller's
400     call.  Heck, that's why they call it the caller! */
401  strbuf->data[size] = '\0';
402  strbuf->len = size;
403
404  return strbuf;
405}
406
407
408svn_stringbuf_t *
409svn_stringbuf_create(const char *cstring, apr_pool_t *pool)
410{
411  return svn_stringbuf_ncreate(cstring, strlen(cstring), pool);
412}
413
414
415svn_stringbuf_t *
416svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
417{
418  return svn_stringbuf_ncreate(str->data, str->len, pool);
419}
420
421
422svn_stringbuf_t *
423svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
424{
425  char *data = apr_pvsprintf(pool, fmt, ap);
426  apr_size_t size = strlen(data);
427  svn_stringbuf_t *new_string;
428
429  new_string = apr_palloc(pool, sizeof(*new_string));
430  new_string->data = data;
431  new_string->len = size;
432  new_string->blocksize = size + 1;
433  new_string->pool = pool;
434
435  return new_string;
436}
437
438
439svn_stringbuf_t *
440svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...)
441{
442  svn_stringbuf_t *str;
443
444  va_list ap;
445  va_start(ap, fmt);
446  str = svn_stringbuf_createv(pool, fmt, ap);
447  va_end(ap);
448
449  return str;
450}
451
452
453void
454svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c)
455{
456  memset(str->data, c, str->len);
457}
458
459
460void
461svn_stringbuf_set(svn_stringbuf_t *str, const char *value)
462{
463  apr_size_t amt = strlen(value);
464
465  svn_stringbuf_ensure(str, amt);
466  memcpy(str->data, value, amt + 1);
467  str->len = amt;
468}
469
470void
471svn_stringbuf_setempty(svn_stringbuf_t *str)
472{
473  if (str->len > 0)
474    str->data[0] = '\0';
475
476  str->len = 0;
477}
478
479
480void
481svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes)
482{
483  if (nbytes > str->len)
484    str->len = 0;
485  else
486    str->len -= nbytes;
487
488  str->data[str->len] = '\0';
489}
490
491
492svn_boolean_t
493svn_stringbuf_isempty(const svn_stringbuf_t *str)
494{
495  return (str->len == 0);
496}
497
498
499void
500svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size)
501{
502  void *mem = NULL;
503  ++minimum_size;  /* + space for '\0' */
504
505  membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool);
506  if (mem && mem != str->data)
507    {
508      if (str->data)
509        memcpy(mem, str->data, str->len + 1);
510      str->data = mem;
511    }
512}
513
514
515/* WARNING - Optimized code ahead!
516 * This function has been hand-tuned for performance. Please read
517 * the comments below before modifying the code.
518 */
519void
520svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte)
521{
522  char *dest;
523  apr_size_t old_len = str->len;
524
525  /* In most cases, there will be pre-allocated memory left
526   * to just write the new byte at the end of the used section
527   * and terminate the string properly.
528   */
529  if (str->blocksize > old_len + 1)
530    {
531      /* The following read does not depend this write, so we
532       * can issue the write first to minimize register pressure:
533       * The value of old_len+1 is no longer needed; on most processors,
534       * dest[old_len+1] will be calculated implicitly as part of
535       * the addressing scheme.
536       */
537      str->len = old_len+1;
538
539      /* Since the compiler cannot be sure that *src->data and *src
540       * don't overlap, we read src->data *once* before writing
541       * to *src->data. Replacing dest with str->data would force
542       * the compiler to read it again after the first byte.
543       */
544      dest = str->data;
545
546      /* If not already available in a register as per ABI, load
547       * "byte" into the register (e.g. the one freed from old_len+1),
548       * then write it to the string buffer and terminate it properly.
549       *
550       * Including the "byte" fetch, all operations so far could be
551       * issued at once and be scheduled at the CPU's descression.
552       * Most likely, no-one will soon depend on the data that will be
553       * written in this function. So, no stalls there, either.
554       */
555      dest[old_len] = byte;
556      dest[old_len+1] = '\0';
557    }
558  else
559    {
560      /* we need to re-allocate the string buffer
561       * -> let the more generic implementation take care of that part
562       */
563
564      /* Depending on the ABI, "byte" is a register value. If we were
565       * to take its address directly, the compiler might decide to
566       * put in on the stack *unconditionally*, even if that would
567       * only be necessary for this block.
568       */
569      char b = byte;
570      svn_stringbuf_appendbytes(str, &b, 1);
571    }
572}
573
574
575void
576svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
577                          apr_size_t count)
578{
579  apr_size_t total_len;
580  void *start_address;
581
582  total_len = str->len + count;  /* total size needed */
583
584  /* svn_stringbuf_ensure adds 1 for null terminator. */
585  svn_stringbuf_ensure(str, total_len);
586
587  /* get address 1 byte beyond end of original bytestring */
588  start_address = (str->data + str->len);
589
590  memcpy(start_address, bytes, count);
591  str->len = total_len;
592
593  str->data[str->len] = '\0';  /* We don't know if this is binary
594                                  data or not, but convention is
595                                  to null-terminate. */
596}
597
598
599void
600svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
601                        const svn_stringbuf_t *appendstr)
602{
603  svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len);
604}
605
606
607void
608svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr)
609{
610  svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr));
611}
612
613void
614svn_stringbuf_insert(svn_stringbuf_t *str,
615                     apr_size_t pos,
616                     const char *bytes,
617                     apr_size_t count)
618{
619  if (bytes + count > str->data && bytes < str->data + str->blocksize)
620    {
621      /* special case: BYTES overlaps with this string -> copy the source */
622      const char *temp = apr_pmemdup(str->pool, bytes, count);
623      svn_stringbuf_insert(str, pos, temp, count);
624    }
625  else
626    {
627      if (pos > str->len)
628        pos = str->len;
629
630      svn_stringbuf_ensure(str, str->len + count);
631      memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
632      memcpy(str->data + pos, bytes, count);
633
634      str->len += count;
635    }
636}
637
638void
639svn_stringbuf_remove(svn_stringbuf_t *str,
640                     apr_size_t pos,
641                     apr_size_t count)
642{
643  if (pos > str->len)
644    pos = str->len;
645  if (pos + count > str->len)
646    count = str->len - pos;
647
648  memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
649  str->len -= count;
650}
651
652void
653svn_stringbuf_replace(svn_stringbuf_t *str,
654                      apr_size_t pos,
655                      apr_size_t old_count,
656                      const char *bytes,
657                      apr_size_t new_count)
658{
659  if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
660    {
661      /* special case: BYTES overlaps with this string -> copy the source */
662      const char *temp = apr_pmemdup(str->pool, bytes, new_count);
663      svn_stringbuf_replace(str, pos, old_count, temp, new_count);
664    }
665  else
666    {
667      if (pos > str->len)
668        pos = str->len;
669      if (pos + old_count > str->len)
670        old_count = str->len - pos;
671
672      if (old_count < new_count)
673        {
674          apr_size_t delta = new_count - old_count;
675          svn_stringbuf_ensure(str, str->len + delta);
676        }
677
678      if (old_count != new_count)
679        memmove(str->data + pos + new_count, str->data + pos + old_count,
680                str->len - pos - old_count + 1);
681
682      memcpy(str->data + pos, bytes, new_count);
683      str->len += new_count - old_count;
684    }
685}
686
687
688svn_stringbuf_t *
689svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool)
690{
691  return (svn_stringbuf_ncreate(original_string->data,
692                                original_string->len, pool));
693}
694
695
696
697svn_boolean_t
698svn_stringbuf_compare(const svn_stringbuf_t *str1,
699                      const svn_stringbuf_t *str2)
700{
701  return string_compare(str1->data, str2->data, str1->len, str2->len);
702}
703
704
705
706apr_size_t
707svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str)
708{
709  return string_first_non_whitespace(str->data, str->len);
710}
711
712
713void
714svn_stringbuf_strip_whitespace(svn_stringbuf_t *str)
715{
716  /* Find first non-whitespace character */
717  apr_size_t offset = svn_stringbuf_first_non_whitespace(str);
718
719  /* Go ahead!  Waste some RAM, we've got pools! :)  */
720  str->data += offset;
721  str->len -= offset;
722  str->blocksize -= offset;
723
724  /* Now that we've trimmed the front, trim the end, wasting more RAM. */
725  while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1]))
726    str->len--;
727  str->data[str->len] = '\0';
728}
729
730
731apr_size_t
732svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch)
733{
734  return find_char_backward(str->data, str->len, ch);
735}
736
737
738svn_boolean_t
739svn_string_compare_stringbuf(const svn_string_t *str1,
740                             const svn_stringbuf_t *str2)
741{
742  return string_compare(str1->data, str2->data, str1->len, str2->len);
743}
744
745
746
747/*** C string stuff. ***/
748
749void
750svn_cstring_split_append(apr_array_header_t *array,
751                         const char *input,
752                         const char *sep_chars,
753                         svn_boolean_t chop_whitespace,
754                         apr_pool_t *pool)
755{
756  char *pats;
757  char *p;
758
759  pats = apr_pstrdup(pool, input);  /* strtok wants non-const data */
760  p = svn_cstring_tokenize(sep_chars, &pats);
761
762  while (p)
763    {
764      if (chop_whitespace)
765        {
766          while (svn_ctype_isspace(*p))
767            p++;
768
769          {
770            char *e = p + (strlen(p) - 1);
771            while ((e >= p) && (svn_ctype_isspace(*e)))
772              e--;
773            *(++e) = '\0';
774          }
775        }
776
777      if (p[0] != '\0')
778        APR_ARRAY_PUSH(array, const char *) = p;
779
780      p = svn_cstring_tokenize(sep_chars, &pats);
781    }
782
783  return;
784}
785
786
787apr_array_header_t *
788svn_cstring_split(const char *input,
789                  const char *sep_chars,
790                  svn_boolean_t chop_whitespace,
791                  apr_pool_t *pool)
792{
793  apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
794  svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool);
795  return a;
796}
797
798
799svn_boolean_t svn_cstring_match_glob_list(const char *str,
800                                          const apr_array_header_t *list)
801{
802  int i;
803
804  for (i = 0; i < list->nelts; i++)
805    {
806      const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
807
808      if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
809        return TRUE;
810    }
811
812  return FALSE;
813}
814
815svn_boolean_t
816svn_cstring_match_list(const char *str, const apr_array_header_t *list)
817{
818  int i;
819
820  for (i = 0; i < list->nelts; i++)
821    {
822      const char *this_str = APR_ARRAY_IDX(list, i, char *);
823
824      if (strcmp(this_str, str) == 0)
825        return TRUE;
826    }
827
828  return FALSE;
829}
830
831char *
832svn_cstring_tokenize(const char *sep, char **str)
833{
834    char *token;
835    const char * next;
836    char csep;
837
838    /* check parameters */
839    if ((sep == NULL) || (str == NULL) || (*str == NULL))
840        return NULL;
841
842    /* let APR handle edge cases and multiple separators */
843    csep = *sep;
844    if (csep == '\0' || sep[1] != '\0')
845      return apr_strtok(NULL, sep, str);
846
847    /* skip characters in sep (will terminate at '\0') */
848    token = *str;
849    while (*token == csep)
850        ++token;
851
852    if (!*token)          /* no more tokens */
853        return NULL;
854
855    /* skip valid token characters to terminate token and
856     * prepare for the next call (will terminate at '\0)
857     */
858    next = strchr(token, csep);
859    if (next == NULL)
860      {
861        *str = token + strlen(token);
862      }
863    else
864      {
865        *(char *)next = '\0';
866        *str = (char *)next + 1;
867      }
868
869    return token;
870}
871
872int svn_cstring_count_newlines(const char *msg)
873{
874  int count = 0;
875  const char *p;
876
877  for (p = msg; *p; p++)
878    {
879      if (*p == '\n')
880        {
881          count++;
882          if (*(p + 1) == '\r')
883            p++;
884        }
885      else if (*p == '\r')
886        {
887          count++;
888          if (*(p + 1) == '\n')
889            p++;
890        }
891    }
892
893  return count;
894}
895
896char *
897svn_cstring_join(const apr_array_header_t *strings,
898                 const char *separator,
899                 apr_pool_t *pool)
900{
901  svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
902  size_t sep_len = strlen(separator);
903  int i;
904
905  for (i = 0; i < strings->nelts; i++)
906    {
907      const char *string = APR_ARRAY_IDX(strings, i, const char *);
908      svn_stringbuf_appendbytes(new_str, string, strlen(string));
909      svn_stringbuf_appendbytes(new_str, separator, sep_len);
910    }
911  return new_str->data;
912}
913
914int
915svn_cstring_casecmp(const char *str1, const char *str2)
916{
917  for (;;)
918    {
919      const int a = *str1++;
920      const int b = *str2++;
921      const int cmp = svn_ctype_casecmp(a, b);
922      if (cmp || !a || !b)
923        return cmp;
924    }
925}
926
927svn_error_t *
928svn_cstring_strtoui64(apr_uint64_t *n, const char *str,
929                      apr_uint64_t minval, apr_uint64_t maxval,
930                      int base)
931{
932  apr_int64_t val;
933  char *endptr;
934
935  /* We assume errno is thread-safe. */
936  errno = 0; /* APR-0.9 doesn't always set errno */
937
938  /* ### We're throwing away half the number range here.
939   * ### APR needs a apr_strtoui64() function. */
940  val = apr_strtoi64(str, &endptr, base);
941  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
942    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
943                             _("Could not convert '%s' into a number"),
944                             str);
945  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
946      val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
947    /* ### Mark this for translation when gettext doesn't choke on macros. */
948    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
949                             "Number '%s' is out of range "
950                             "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'",
951                             str, minval, maxval);
952  *n = val;
953  return SVN_NO_ERROR;
954}
955
956svn_error_t *
957svn_cstring_atoui64(apr_uint64_t *n, const char *str)
958{
959  return svn_error_trace(svn_cstring_strtoui64(n, str, 0,
960                                               APR_UINT64_MAX, 10));
961}
962
963svn_error_t *
964svn_cstring_atoui(unsigned int *n, const char *str)
965{
966  apr_uint64_t val;
967
968  SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10));
969  *n = (unsigned int)val;
970  return SVN_NO_ERROR;
971}
972
973svn_error_t *
974svn_cstring_strtoi64(apr_int64_t *n, const char *str,
975                     apr_int64_t minval, apr_int64_t maxval,
976                     int base)
977{
978  apr_int64_t val;
979  char *endptr;
980
981  /* We assume errno is thread-safe. */
982  errno = 0; /* APR-0.9 doesn't always set errno */
983
984  val = apr_strtoi64(str, &endptr, base);
985  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
986    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
987                             _("Could not convert '%s' into a number"),
988                             str);
989  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
990      val < minval || val > maxval)
991    /* ### Mark this for translation when gettext doesn't choke on macros. */
992    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
993                             "Number '%s' is out of range "
994                             "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'",
995                             str, minval, maxval);
996  *n = val;
997  return SVN_NO_ERROR;
998}
999
1000svn_error_t *
1001svn_cstring_atoi64(apr_int64_t *n, const char *str)
1002{
1003  return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN,
1004                                              APR_INT64_MAX, 10));
1005}
1006
1007svn_error_t *
1008svn_cstring_atoi(int *n, const char *str)
1009{
1010  apr_int64_t val;
1011
1012  SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10));
1013  *n = (int)val;
1014  return SVN_NO_ERROR;
1015}
1016
1017
1018apr_status_t
1019svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base)
1020{
1021#if !APR_VERSION_AT_LEAST(1,0,0)
1022  errno = 0;
1023  *offset = strtol(buf, end, base);
1024  return APR_FROM_OS_ERROR(errno);
1025#else
1026  return apr_strtoff(offset, buf, end, base);
1027#endif
1028}
1029
1030/* "Precalculated" itoa values for 2 places (including leading zeros).
1031 * For maximum performance, make sure all table entries are word-aligned.
1032 */
1033static const char decimal_table[100][4]
1034    = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09"
1035      , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19"
1036      , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
1037      , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
1038      , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49"
1039      , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59"
1040      , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69"
1041      , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79"
1042      , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89"
1043      , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"};
1044
1045/* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */
1046#define COPY_TWO_BYTES(dest,source)\
1047  memcpy((dest), (source), 2)
1048
1049apr_size_t
1050svn__ui64toa(char * dest, apr_uint64_t number)
1051{
1052  char buffer[SVN_INT64_BUFFER_SIZE];
1053  apr_uint32_t reduced;   /* used for 32 bit DIV */
1054  char* target;
1055
1056  /* Small numbers are by far the most common case.
1057   * Therefore, we use special code.
1058   */
1059  if (number < 100)
1060    {
1061      if (number < 10)
1062        {
1063          dest[0] = (char)('0' + number);
1064          dest[1] = 0;
1065          return 1;
1066        }
1067      else
1068        {
1069          COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]);
1070          dest[2] = 0;
1071          return 2;
1072        }
1073    }
1074
1075  /* Standard code. Write string in pairs of chars back-to-front */
1076  buffer[SVN_INT64_BUFFER_SIZE - 1] = 0;
1077  target = &buffer[SVN_INT64_BUFFER_SIZE - 3];
1078
1079  /* Loop may be executed 0 .. 2 times. */
1080  while (number >= 100000000)
1081    {
1082      /* Number is larger than 100^4, i.e. we can write 4x2 chars.
1083       * Also, use 32 bit DIVs as these are about twice as fast.
1084       */
1085      reduced = (apr_uint32_t)(number % 100000000);
1086      number /= 100000000;
1087
1088      COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]);
1089      reduced /= 100;
1090      COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]);
1091      reduced /= 100;
1092      COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]);
1093      reduced /= 100;
1094      COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]);
1095      target -= 8;
1096    }
1097
1098  /* Now, the number fits into 32 bits, but may still be larger than 99 */
1099  reduced = (apr_uint32_t)(number);
1100  while (reduced >= 100)
1101    {
1102      COPY_TWO_BYTES(target, decimal_table[reduced % 100]);
1103      reduced /= 100;
1104      target -= 2;
1105    }
1106
1107  /* The number is now smaller than 100 but larger than 1 */
1108  COPY_TWO_BYTES(target, decimal_table[reduced]);
1109
1110  /* Correction for uneven count of places. */
1111  if (reduced < 10)
1112    ++target;
1113
1114  /* Copy to target */
1115  memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target);
1116  return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1;
1117}
1118
1119apr_size_t
1120svn__i64toa(char * dest, apr_int64_t number)
1121{
1122  if (number >= 0)
1123    return svn__ui64toa(dest, (apr_uint64_t)number);
1124
1125  *dest = '-';
1126  return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1;
1127}
1128
1129static void
1130ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
1131{
1132  apr_size_t length = svn__ui64toa(buffer, number);
1133  apr_size_t i;
1134
1135  for (i = length; i > 3; i -= 3)
1136    {
1137      memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
1138      buffer[i-3] = seperator;
1139      length++;
1140    }
1141
1142  buffer[length] = 0;
1143}
1144
1145char *
1146svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool)
1147{
1148  char buffer[2 * SVN_INT64_BUFFER_SIZE];
1149  ui64toa_sep(number, seperator, buffer);
1150
1151  return apr_pstrdup(pool, buffer);
1152}
1153
1154char *
1155svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool)
1156{
1157  char buffer[2 * SVN_INT64_BUFFER_SIZE];
1158  if (number < 0)
1159    {
1160      buffer[0] = '-';
1161      ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]);
1162    }
1163  else
1164    ui64toa_sep((apr_uint64_t)(number), seperator, buffer);
1165
1166  return apr_pstrdup(pool, buffer);
1167}
1168
1169unsigned int
1170svn_cstring__similarity(const char *stra, const char *strb,
1171                        svn_membuf_t *buffer, apr_size_t *rlcs)
1172{
1173  svn_string_t stringa, stringb;
1174  stringa.data = stra;
1175  stringa.len = strlen(stra);
1176  stringb.data = strb;
1177  stringb.len = strlen(strb);
1178  return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
1179}
1180
1181unsigned int
1182svn_string__similarity(const svn_string_t *stringa,
1183                       const svn_string_t *stringb,
1184                       svn_membuf_t *buffer, apr_size_t *rlcs)
1185{
1186  const char *stra = stringa->data;
1187  const char *strb = stringb->data;
1188  const apr_size_t lena = stringa->len;
1189  const apr_size_t lenb = stringb->len;
1190  const apr_size_t total = lena + lenb;
1191  const char *enda = stra + lena;
1192  const char *endb = strb + lenb;
1193  apr_size_t lcs = 0;
1194
1195  /* Skip the common prefix ... */
1196  while (stra < enda && strb < endb && *stra == *strb)
1197    {
1198      ++stra; ++strb;
1199      ++lcs;
1200    }
1201
1202  /* ... and the common suffix */
1203  while (stra < enda && strb < endb)
1204    {
1205      --enda; --endb;
1206      if (*enda != *endb)
1207        {
1208          ++enda; ++endb;
1209          break;
1210        }
1211
1212      ++lcs;
1213    }
1214
1215  if (stra < enda && strb < endb)
1216    {
1217      const apr_size_t resta = enda - stra;
1218      const apr_size_t restb = endb - strb;
1219      const apr_size_t slots = (resta > restb ? restb : resta);
1220      apr_size_t *curr, *prev;
1221      const char *pstr;
1222
1223      /* The outer loop must iterate on the longer string. */
1224      if (resta < restb)
1225        {
1226          pstr = stra;
1227          stra = strb;
1228          strb = pstr;
1229
1230          pstr = enda;
1231          enda = endb;
1232          endb = pstr;
1233        }
1234
1235      /* Allocate two columns in the LCS matrix
1236         ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */
1237      svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t));
1238      svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t));
1239      prev = buffer->data;
1240      curr = prev + slots + 1;
1241
1242      /* Calculate LCS length of the remainder */
1243      for (pstr = stra; pstr < enda; ++pstr)
1244        {
1245          int i;
1246          for (i = 1; i <= slots; ++i)
1247            {
1248              if (*pstr == strb[i-1])
1249                curr[i] = prev[i-1] + 1;
1250              else
1251                curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]);
1252            }
1253
1254          /* Swap the buffers, making the previous one current */
1255          {
1256            apr_size_t *const temp = prev;
1257            prev = curr;
1258            curr = temp;
1259          }
1260        }
1261
1262      lcs += prev[slots];
1263    }
1264
1265  if (rlcs)
1266    *rlcs = lcs;
1267
1268  /* Return similarity ratio rounded to 4 significant digits */
1269  if (total)
1270    return(unsigned int)((2000 * lcs + total/2) / total);
1271  else
1272    return 1000;
1273}
1274