1/*
2 * dirent_uri.c:   a library to manipulate URIs and directory entries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28#include <ctype.h>
29
30#include <apr_uri.h>
31#include <apr_lib.h>
32
33#include "svn_private_config.h"
34#include "svn_string.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_ctype.h"
38
39#include "dirent_uri.h"
40#include "private/svn_fspath.h"
41
42/* The canonical empty path.  Can this be changed?  Well, change the empty
43   test below and the path library will work, not so sure about the fs/wc
44   libraries. */
45#define SVN_EMPTY_PATH ""
46
47/* TRUE if s is the canonical empty path, FALSE otherwise */
48#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
49
50/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
51   this be changed?  Well, the path library will work, not so sure about
52   the OS! */
53#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
54
55/* This check must match the check on top of dirent_uri-tests.c and
56   path-tests.c */
57#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
58#define SVN_USE_DOS_PATHS
59#endif
60
61/* Path type definition. Used only by internal functions. */
62typedef enum path_type_t {
63  type_uri,
64  type_dirent,
65  type_relpath
66} path_type_t;
67
68
69/**** Forward declarations *****/
70
71static svn_boolean_t
72relpath_is_canonical(const char *relpath);
73
74
75/**** Internal implementation functions *****/
76
77/* Return an internal-style new path based on PATH, allocated in POOL.
78 *
79 * "Internal-style" means that separators are all '/'.
80 */
81static const char *
82internal_style(const char *path, apr_pool_t *pool)
83{
84#if '/' != SVN_PATH_LOCAL_SEPARATOR
85    {
86      char *p = apr_pstrdup(pool, path);
87      path = p;
88
89      /* Convert all local-style separators to the canonical ones. */
90      for (; *p != '\0'; ++p)
91        if (*p == SVN_PATH_LOCAL_SEPARATOR)
92          *p = '/';
93    }
94#endif
95
96  return path;
97}
98
99/* Locale insensitive tolower() for converting parts of dirents and urls
100   while canonicalizing */
101static char
102canonicalize_to_lower(char c)
103{
104  if (c < 'A' || c > 'Z')
105    return c;
106  else
107    return (char)(c - 'A' + 'a');
108}
109
110/* Locale insensitive toupper() for converting parts of dirents and urls
111   while canonicalizing */
112static char
113canonicalize_to_upper(char c)
114{
115  if (c < 'a' || c > 'z')
116    return c;
117  else
118    return (char)(c - 'a' + 'A');
119}
120
121/* Calculates the length of the dirent absolute or non absolute root in
122   DIRENT, return 0 if dirent is not rooted  */
123static apr_size_t
124dirent_root_length(const char *dirent, apr_size_t len)
125{
126#ifdef SVN_USE_DOS_PATHS
127  if (len >= 2 && dirent[1] == ':' &&
128      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
129       (dirent[0] >= 'a' && dirent[0] <= 'z')))
130    {
131      return (len > 2 && dirent[2] == '/') ? 3 : 2;
132    }
133
134  if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
135    {
136      apr_size_t i = 2;
137
138      while (i < len && dirent[i] != '/')
139        i++;
140
141      if (i == len)
142        return len; /* Cygwin drive alias, invalid path on WIN32 */
143
144      i++; /* Skip '/' */
145
146      while (i < len && dirent[i] != '/')
147        i++;
148
149      return i;
150    }
151#endif /* SVN_USE_DOS_PATHS */
152  if (len >= 1 && dirent[0] == '/')
153    return 1;
154
155  return 0;
156}
157
158
159/* Return the length of substring necessary to encompass the entire
160 * previous dirent segment in DIRENT, which should be a LEN byte string.
161 *
162 * A trailing slash will not be included in the returned length except
163 * in the case in which DIRENT is absolute and there are no more
164 * previous segments.
165 */
166static apr_size_t
167dirent_previous_segment(const char *dirent,
168                        apr_size_t len)
169{
170  if (len == 0)
171    return 0;
172
173  --len;
174  while (len > 0 && dirent[len] != '/'
175#ifdef SVN_USE_DOS_PATHS
176                 && (dirent[len] != ':' || len != 1)
177#endif /* SVN_USE_DOS_PATHS */
178        )
179    --len;
180
181  /* check if the remaining segment including trailing '/' is a root dirent */
182  if (dirent_root_length(dirent, len+1) == len + 1)
183    return len + 1;
184  else
185    return len;
186}
187
188/* Calculates the length occupied by the schema defined root of URI */
189static apr_size_t
190uri_schema_root_length(const char *uri, apr_size_t len)
191{
192  apr_size_t i;
193
194  for (i = 0; i < len; i++)
195    {
196      if (uri[i] == '/')
197        {
198          if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
199            {
200              /* We have an absolute uri */
201              if (i == 5 && strncmp("file", uri, 4) == 0)
202                return 7; /* file:// */
203              else
204                {
205                  for (i += 2; i < len; i++)
206                    if (uri[i] == '/')
207                      return i;
208
209                  return len; /* Only a hostname is found */
210                }
211            }
212          else
213            return 0;
214        }
215    }
216
217  return 0;
218}
219
220/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
221   a non absolute root. (E.g. '/' or 'F:' on Windows) */
222static svn_boolean_t
223dirent_is_rooted(const char *dirent)
224{
225  if (! dirent)
226    return FALSE;
227
228  /* Root on all systems */
229  if (dirent[0] == '/')
230    return TRUE;
231
232  /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
233     where 'H' is any letter. */
234#ifdef SVN_USE_DOS_PATHS
235  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
236       (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
237      (dirent[1] == ':'))
238     return TRUE;
239#endif /* SVN_USE_DOS_PATHS */
240
241  return FALSE;
242}
243
244/* Return the length of substring necessary to encompass the entire
245 * previous relpath segment in RELPATH, which should be a LEN byte string.
246 *
247 * A trailing slash will not be included in the returned length.
248 */
249static apr_size_t
250relpath_previous_segment(const char *relpath,
251                         apr_size_t len)
252{
253  if (len == 0)
254    return 0;
255
256  --len;
257  while (len > 0 && relpath[len] != '/')
258    --len;
259
260  return len;
261}
262
263/* Return the length of substring necessary to encompass the entire
264 * previous uri segment in URI, which should be a LEN byte string.
265 *
266 * A trailing slash will not be included in the returned length except
267 * in the case in which URI is absolute and there are no more
268 * previous segments.
269 */
270static apr_size_t
271uri_previous_segment(const char *uri,
272                     apr_size_t len)
273{
274  apr_size_t root_length;
275  apr_size_t i = len;
276  if (len == 0)
277    return 0;
278
279  root_length = uri_schema_root_length(uri, len);
280
281  --i;
282  while (len > root_length && uri[i] != '/')
283    --i;
284
285  if (i == 0 && len > 1 && *uri == '/')
286    return 1;
287
288  return i;
289}
290
291/* Return the canonicalized version of PATH, of type TYPE, allocated in
292 * POOL.
293 */
294static const char *
295canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
296{
297  char *canon, *dst;
298  const char *src;
299  apr_size_t seglen;
300  apr_size_t schemelen = 0;
301  apr_size_t canon_segments = 0;
302  svn_boolean_t url = FALSE;
303  char *schema_data = NULL;
304
305  /* "" is already canonical, so just return it; note that later code
306     depends on path not being zero-length.  */
307  if (SVN_PATH_IS_EMPTY(path))
308    {
309      assert(type != type_uri);
310      return "";
311    }
312
313  dst = canon = apr_pcalloc(pool, strlen(path) + 1);
314
315  /* If this is supposed to be an URI, it should start with
316     "scheme://".  We'll copy the scheme, host name, etc. to DST and
317     set URL = TRUE. */
318  src = path;
319  if (type == type_uri)
320    {
321      assert(*src != '/');
322
323      while (*src && (*src != '/') && (*src != ':'))
324        src++;
325
326      if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
327        {
328          const char *seg;
329
330          url = TRUE;
331
332          /* Found a scheme, convert to lowercase and copy to dst. */
333          src = path;
334          while (*src != ':')
335            {
336              *(dst++) = canonicalize_to_lower((*src++));
337              schemelen++;
338            }
339          *(dst++) = ':';
340          *(dst++) = '/';
341          *(dst++) = '/';
342          src += 3;
343          schemelen += 3;
344
345          /* This might be the hostname */
346          seg = src;
347          while (*src && (*src != '/') && (*src != '@'))
348            src++;
349
350          if (*src == '@')
351            {
352              /* Copy the username & password. */
353              seglen = src - seg + 1;
354              memcpy(dst, seg, seglen);
355              dst += seglen;
356              src++;
357            }
358          else
359            src = seg;
360
361          /* Found a hostname, convert to lowercase and copy to dst. */
362          if (*src == '[')
363            {
364             *(dst++) = *(src++); /* Copy '[' */
365
366              while (*src == ':'
367                     || (*src >= '0' && (*src <= '9'))
368                     || (*src >= 'a' && (*src <= 'f'))
369                     || (*src >= 'A' && (*src <= 'F')))
370                {
371                  *(dst++) = canonicalize_to_lower((*src++));
372                }
373
374              if (*src == ']')
375                *(dst++) = *(src++); /* Copy ']' */
376            }
377          else
378            while (*src && (*src != '/') && (*src != ':'))
379              *(dst++) = canonicalize_to_lower((*src++));
380
381          if (*src == ':')
382            {
383              /* We probably have a port number: Is it a default portnumber
384                 which doesn't belong in a canonical url? */
385              if (src[1] == '8' && src[2] == '0'
386                  && (src[3]== '/'|| !src[3])
387                  && !strncmp(canon, "http:", 5))
388                {
389                  src += 3;
390                }
391              else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
392                       && (src[4]== '/'|| !src[4])
393                       && !strncmp(canon, "https:", 6))
394                {
395                  src += 4;
396                }
397              else if (src[1] == '3' && src[2] == '6'
398                       && src[3] == '9' && src[4] == '0'
399                       && (src[5]== '/'|| !src[5])
400                       && !strncmp(canon, "svn:", 4))
401                {
402                  src += 5;
403                }
404              else if (src[1] == '/' || !src[1])
405                {
406                  src += 1;
407                }
408
409              while (*src && (*src != '/'))
410                *(dst++) = canonicalize_to_lower((*src++));
411            }
412
413          /* Copy trailing slash, or null-terminator. */
414          *(dst) = *(src);
415
416          /* Move src and dst forward only if we are not
417           * at null-terminator yet. */
418          if (*src)
419            {
420              src++;
421              dst++;
422              schema_data = dst;
423            }
424
425          canon_segments = 1;
426        }
427    }
428
429  /* Copy to DST any separator or drive letter that must come before the
430     first regular path segment. */
431  if (! url && type != type_relpath)
432    {
433      src = path;
434      /* If this is an absolute path, then just copy over the initial
435         separator character. */
436      if (*src == '/')
437        {
438          *(dst++) = *(src++);
439
440#ifdef SVN_USE_DOS_PATHS
441          /* On Windows permit two leading separator characters which means an
442           * UNC path. */
443          if ((type == type_dirent) && *src == '/')
444            *(dst++) = *(src++);
445#endif /* SVN_USE_DOS_PATHS */
446        }
447#ifdef SVN_USE_DOS_PATHS
448      /* On Windows the first segment can be a drive letter, which we normalize
449         to upper case. */
450      else if (type == type_dirent &&
451               ((*src >= 'a' && *src <= 'z') ||
452                (*src >= 'A' && *src <= 'Z')) &&
453               (src[1] == ':'))
454        {
455          *(dst++) = canonicalize_to_upper(*(src++));
456          /* Leave the ':' to be processed as (or as part of) a path segment
457             by the following code block, so we need not care whether it has
458             a slash after it. */
459        }
460#endif /* SVN_USE_DOS_PATHS */
461    }
462
463  while (*src)
464    {
465      /* Parse each segment, finding the closing '/' (which might look
466         like '%2F' for URIs).  */
467      const char *next = src;
468      apr_size_t slash_len = 0;
469
470      while (*next
471             && (next[0] != '/')
472             && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
473                    canonicalize_to_upper(next[2]) == 'F')))
474        {
475          ++next;
476        }
477
478      /* Record how long our "slash" is. */
479      if (next[0] == '/')
480        slash_len = 1;
481      else if (type == type_uri && next[0] == '%')
482        slash_len = 3;
483
484      seglen = next - src;
485
486      if (seglen == 0
487          || (seglen == 1 && src[0] == '.')
488          || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
489              && canonicalize_to_upper(src[2]) == 'E'))
490        {
491          /* Empty or noop segment, so do nothing.  (For URIs, '%2E'
492             is equivalent to '.').  */
493        }
494#ifdef SVN_USE_DOS_PATHS
495      /* If this is the first path segment of a file:// URI and it contains a
496         windows drive letter, convert the drive letter to upper case. */
497      else if (url && canon_segments == 1 && seglen == 2 &&
498               (strncmp(canon, "file:", 5) == 0) &&
499               src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
500        {
501          *(dst++) = canonicalize_to_upper(src[0]);
502          *(dst++) = ':';
503          if (*next)
504            *(dst++) = *next;
505          canon_segments++;
506        }
507#endif /* SVN_USE_DOS_PATHS */
508      else
509        {
510          /* An actual segment, append it to the destination path */
511          memcpy(dst, src, seglen);
512          dst += seglen;
513          if (slash_len)
514            *(dst++) = '/';
515          canon_segments++;
516        }
517
518      /* Skip over trailing slash to the next segment. */
519      src = next + slash_len;
520    }
521
522  /* Remove the trailing slash if there was at least one
523   * canonical segment and the last segment ends with a slash.
524   *
525   * But keep in mind that, for URLs, the scheme counts as a
526   * canonical segment -- so if path is ONLY a scheme (such
527   * as "https://") we should NOT remove the trailing slash. */
528  if ((canon_segments > 0 && *(dst - 1) == '/')
529      && ! (url && path[schemelen] == '\0'))
530    {
531      dst --;
532    }
533
534  *dst = '\0';
535
536#ifdef SVN_USE_DOS_PATHS
537  /* Skip leading double slashes when there are less than 2
538   * canon segments. UNC paths *MUST* have two segments. */
539  if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
540    {
541      if (canon_segments < 2)
542        return canon + 1;
543      else
544        {
545          /* Now we're sure this is a valid UNC path, convert the server name
546             (the first path segment) to lowercase as Windows treats it as case
547             insensitive.
548             Note: normally the share name is treated as case insensitive too,
549             but it seems to be possible to configure Samba to treat those as
550             case sensitive, so better leave that alone. */
551          for (dst = canon + 2; *dst && *dst != '/'; dst++)
552            *dst = canonicalize_to_lower(*dst);
553        }
554    }
555#endif /* SVN_USE_DOS_PATHS */
556
557  /* Check the normalization of characters in a uri */
558  if (schema_data)
559    {
560      int need_extra = 0;
561      src = schema_data;
562
563      while (*src)
564        {
565          switch (*src)
566            {
567              case '/':
568                break;
569              case '%':
570                if (!svn_ctype_isxdigit(*(src+1)) ||
571                    !svn_ctype_isxdigit(*(src+2)))
572                  need_extra += 2;
573                else
574                  src += 2;
575                break;
576              default:
577                if (!svn_uri__char_validity[(unsigned char)*src])
578                  need_extra += 2;
579                break;
580            }
581          src++;
582        }
583
584      if (need_extra > 0)
585        {
586          apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
587
588          dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
589          memcpy(dst, canon, pre_schema_size);
590          canon = dst;
591
592          dst += pre_schema_size;
593        }
594      else
595        dst = schema_data;
596
597      src = schema_data;
598
599      while (*src)
600        {
601          switch (*src)
602            {
603              case '/':
604                *(dst++) = '/';
605                break;
606              case '%':
607                if (!svn_ctype_isxdigit(*(src+1)) ||
608                    !svn_ctype_isxdigit(*(src+2)))
609                  {
610                    *(dst++) = '%';
611                    *(dst++) = '2';
612                    *(dst++) = '5';
613                  }
614                else
615                  {
616                    char digitz[3];
617                    int val;
618
619                    digitz[0] = *(++src);
620                    digitz[1] = *(++src);
621                    digitz[2] = 0;
622
623                    val = (int)strtol(digitz, NULL, 16);
624
625                    if (svn_uri__char_validity[(unsigned char)val])
626                      *(dst++) = (char)val;
627                    else
628                      {
629                        *(dst++) = '%';
630                        *(dst++) = canonicalize_to_upper(digitz[0]);
631                        *(dst++) = canonicalize_to_upper(digitz[1]);
632                      }
633                  }
634                break;
635              default:
636                if (!svn_uri__char_validity[(unsigned char)*src])
637                  {
638                    apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
639                    dst += 3;
640                  }
641                else
642                  *(dst++) = *src;
643                break;
644            }
645          src++;
646        }
647      *dst = '\0';
648    }
649
650  return canon;
651}
652
653/* Return the string length of the longest common ancestor of PATH1 and PATH2.
654 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
655 * PATH1 and PATH2 are regular paths.
656 *
657 * If the two paths do not share a common ancestor, return 0.
658 *
659 * New strings are allocated in POOL.
660 */
661static apr_size_t
662get_longest_ancestor_length(path_type_t types,
663                            const char *path1,
664                            const char *path2,
665                            apr_pool_t *pool)
666{
667  apr_size_t path1_len, path2_len;
668  apr_size_t i = 0;
669  apr_size_t last_dirsep = 0;
670#ifdef SVN_USE_DOS_PATHS
671  svn_boolean_t unc = FALSE;
672#endif
673
674  path1_len = strlen(path1);
675  path2_len = strlen(path2);
676
677  if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
678    return 0;
679
680  while (path1[i] == path2[i])
681    {
682      /* Keep track of the last directory separator we hit. */
683      if (path1[i] == '/')
684        last_dirsep = i;
685
686      i++;
687
688      /* If we get to the end of either path, break out. */
689      if ((i == path1_len) || (i == path2_len))
690        break;
691    }
692
693  /* two special cases:
694     1. '/' is the longest common ancestor of '/' and '/foo' */
695  if (i == 1 && path1[0] == '/' && path2[0] == '/')
696    return 1;
697  /* 2. '' is the longest common ancestor of any non-matching
698   * strings 'foo' and 'bar' */
699  if (types == type_dirent && i == 0)
700    return 0;
701
702  /* Handle some windows specific cases */
703#ifdef SVN_USE_DOS_PATHS
704  if (types == type_dirent)
705    {
706      /* don't count the '//' from UNC paths */
707      if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
708        {
709          last_dirsep = 0;
710          unc = TRUE;
711        }
712
713      /* X:/ and X:/foo */
714      if (i == 3 && path1[2] == '/' && path1[1] == ':')
715        return i;
716
717      /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
718       * Note that this assertion triggers only if the code above has
719       * been broken. The code below relies on this assertion, because
720       * it uses [i - 1] as index. */
721      assert(i > 0);
722
723      /* X: and X:/ */
724      if ((path1[i - 1] == ':' && path2[i] == '/') ||
725          (path2[i - 1] == ':' && path1[i] == '/'))
726          return 0;
727      /* X: and X:foo */
728      if (path1[i - 1] == ':' || path2[i - 1] == ':')
729          return i;
730    }
731#endif /* SVN_USE_DOS_PATHS */
732
733  /* last_dirsep is now the offset of the last directory separator we
734     crossed before reaching a non-matching byte.  i is the offset of
735     that non-matching byte, and is guaranteed to be <= the length of
736     whichever path is shorter.
737     If one of the paths is the common part return that. */
738  if (((i == path1_len) && (path2[i] == '/'))
739           || ((i == path2_len) && (path1[i] == '/'))
740           || ((i == path1_len) && (i == path2_len)))
741    return i;
742  else
743    {
744      /* Nothing in common but the root folder '/' or 'X:/' for Windows
745         dirents. */
746#ifdef SVN_USE_DOS_PATHS
747      if (! unc)
748        {
749          /* X:/foo and X:/bar returns X:/ */
750          if ((types == type_dirent) &&
751              last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
752                               && path2[1] == ':' && path2[2] == '/')
753            return 3;
754#endif /* SVN_USE_DOS_PATHS */
755          if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
756            return 1;
757#ifdef SVN_USE_DOS_PATHS
758        }
759#endif
760    }
761
762  return last_dirsep;
763}
764
765/* Determine whether PATH2 is a child of PATH1.
766 *
767 * PATH2 is a child of PATH1 if
768 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
769 * or
770 * 2) PATH2 is has n components, PATH1 has x < n components,
771 *    and PATH1 matches PATH2 in all its x components.
772 *    Components are separated by a slash, '/'.
773 *
774 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
775 * PATH1 and PATH2 are regular paths.
776 *
777 * If PATH2 is not a child of PATH1, return NULL.
778 *
779 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
780 * of the child part of PATH2 in POOL and return a pointer to the
781 * newly allocated child part.
782 *
783 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
784 * pointing to the child part of PATH2.
785 * */
786static const char *
787is_child(path_type_t type, const char *path1, const char *path2,
788         apr_pool_t *pool)
789{
790  apr_size_t i;
791
792  /* Allow "" and "foo" or "H:foo" to be parent/child */
793  if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
794    {
795      if (SVN_PATH_IS_EMPTY(path2))            /* "" not a child    */
796        return NULL;
797
798      /* check if this is an absolute path */
799      if ((type == type_uri) ||
800          (type == type_dirent && dirent_is_rooted(path2)))
801        return NULL;
802      else
803        /* everything else is child */
804        return pool ? apr_pstrdup(pool, path2) : path2;
805    }
806
807  /* Reach the end of at least one of the paths.  How should we handle
808     things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
809     appear to arise in the current Subversion code, it's not clear to me
810     if they should be parent/child or not. */
811  /* Hmmm... aren't paths assumed to be canonical in this function?
812   * How can "foo///bar" even happen if the paths are canonical? */
813  for (i = 0; path1[i] && path2[i]; i++)
814    if (path1[i] != path2[i])
815      return NULL;
816
817  /* FIXME: This comment does not really match
818   * the checks made in the code it refers to: */
819  /* There are two cases that are parent/child
820          ...      path1[i] == '\0'
821          .../foo  path2[i] == '/'
822      or
823          /        path1[i] == '\0'
824          /foo     path2[i] != '/'
825
826     Other root paths (like X:/) fall under the former case:
827          X:/        path1[i] == '\0'
828          X:/foo     path2[i] != '/'
829
830     Check for '//' to avoid matching '/' and '//srv'.
831  */
832  if (path1[i] == '\0' && path2[i])
833    {
834      if (path1[i - 1] == '/'
835#ifdef SVN_USE_DOS_PATHS
836          || ((type == type_dirent) && path1[i - 1] == ':')
837#endif
838           )
839        {
840          if (path2[i] == '/')
841            /* .../
842             * ..../
843             *     i   */
844            return NULL;
845          else
846            /* .../
847             * .../foo
848             *     i    */
849            return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
850        }
851      else if (path2[i] == '/')
852        {
853          if (path2[i + 1])
854            /* ...
855             * .../foo
856             *    i    */
857            return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
858          else
859            /* ...
860             * .../
861             *    i    */
862            return NULL;
863        }
864    }
865
866  /* Otherwise, path2 isn't a child. */
867  return NULL;
868}
869
870
871/**** Public API functions ****/
872
873const char *
874svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
875{
876  return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
877}
878
879const char *
880svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
881{
882  /* Internally, Subversion represents the current directory with the
883     empty string.  But users like to see "." . */
884  if (SVN_PATH_IS_EMPTY(dirent))
885    return ".";
886
887#if '/' != SVN_PATH_LOCAL_SEPARATOR
888    {
889      char *p = apr_pstrdup(pool, dirent);
890      dirent = p;
891
892      /* Convert all canonical separators to the local-style ones. */
893      for (; *p != '\0'; ++p)
894        if (*p == '/')
895          *p = SVN_PATH_LOCAL_SEPARATOR;
896    }
897#endif
898
899  return dirent;
900}
901
902const char *
903svn_relpath__internal_style(const char *relpath,
904                            apr_pool_t *pool)
905{
906  return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
907}
908
909
910/* We decided against using apr_filepath_root here because of the negative
911   performance impact (creating a pool and converting strings ). */
912svn_boolean_t
913svn_dirent_is_root(const char *dirent, apr_size_t len)
914{
915#ifdef SVN_USE_DOS_PATHS
916  /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
917     are also root directories */
918  if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
919      (dirent[1] == ':') &&
920      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
921       (dirent[0] >= 'a' && dirent[0] <= 'z')))
922    return TRUE;
923
924  /* On Windows and Cygwin //server/share is a root directory,
925     and on Cygwin //drive is a drive alias */
926  if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
927      && dirent[len - 1] != '/')
928    {
929      int segments = 0;
930      apr_size_t i;
931      for (i = len; i >= 2; i--)
932        {
933          if (dirent[i] == '/')
934            {
935              segments ++;
936              if (segments > 1)
937                return FALSE;
938            }
939        }
940#ifdef __CYGWIN__
941      return (segments <= 1);
942#else
943      return (segments == 1); /* //drive is invalid on plain Windows */
944#endif
945    }
946#endif
947
948  /* directory is root if it's equal to '/' */
949  if (len == 1 && dirent[0] == '/')
950    return TRUE;
951
952  return FALSE;
953}
954
955svn_boolean_t
956svn_uri_is_root(const char *uri, apr_size_t len)
957{
958  assert(svn_uri_is_canonical(uri, NULL));
959  return (len == uri_schema_root_length(uri, len));
960}
961
962char *svn_dirent_join(const char *base,
963                      const char *component,
964                      apr_pool_t *pool)
965{
966  apr_size_t blen = strlen(base);
967  apr_size_t clen = strlen(component);
968  char *dirent;
969  int add_separator;
970
971  assert(svn_dirent_is_canonical(base, pool));
972  assert(svn_dirent_is_canonical(component, pool));
973
974  /* If the component is absolute, then return it.  */
975  if (svn_dirent_is_absolute(component))
976    return apr_pmemdup(pool, component, clen + 1);
977
978  /* If either is empty return the other */
979  if (SVN_PATH_IS_EMPTY(base))
980    return apr_pmemdup(pool, component, clen + 1);
981  if (SVN_PATH_IS_EMPTY(component))
982    return apr_pmemdup(pool, base, blen + 1);
983
984#ifdef SVN_USE_DOS_PATHS
985  if (component[0] == '/')
986    {
987      /* '/' is drive relative on Windows, not absolute like on Posix */
988      if (dirent_is_rooted(base))
989        {
990          /* Join component without '/' to root-of(base) */
991          blen = dirent_root_length(base, blen);
992          component++;
993          clen--;
994
995          if (blen == 2 && base[1] == ':') /* "C:" case */
996            {
997              char *root = apr_pmemdup(pool, base, 3);
998              root[2] = '/'; /* We don't need the final '\0' */
999
1000              base = root;
1001              blen = 3;
1002            }
1003
1004          if (clen == 0)
1005            return apr_pstrndup(pool, base, blen);
1006        }
1007      else
1008        return apr_pmemdup(pool, component, clen + 1);
1009    }
1010  else if (dirent_is_rooted(component))
1011    return apr_pmemdup(pool, component, clen + 1);
1012#endif /* SVN_USE_DOS_PATHS */
1013
1014  /* if last character of base is already a separator, don't add a '/' */
1015  add_separator = 1;
1016  if (base[blen - 1] == '/'
1017#ifdef SVN_USE_DOS_PATHS
1018       || base[blen - 1] == ':'
1019#endif
1020        )
1021          add_separator = 0;
1022
1023  /* Construct the new, combined dirent. */
1024  dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1025  memcpy(dirent, base, blen);
1026  if (add_separator)
1027    dirent[blen] = '/';
1028  memcpy(dirent + blen + add_separator, component, clen + 1);
1029
1030  return dirent;
1031}
1032
1033char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1034{
1035#define MAX_SAVED_LENGTHS 10
1036  apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1037  apr_size_t total_len;
1038  int nargs;
1039  va_list va;
1040  const char *s;
1041  apr_size_t len;
1042  char *dirent;
1043  char *p;
1044  int add_separator;
1045  int base_arg = 0;
1046
1047  total_len = strlen(base);
1048
1049  assert(svn_dirent_is_canonical(base, pool));
1050
1051  /* if last character of base is already a separator, don't add a '/' */
1052  add_separator = 1;
1053  if (total_len == 0
1054       || base[total_len - 1] == '/'
1055#ifdef SVN_USE_DOS_PATHS
1056       || base[total_len - 1] == ':'
1057#endif
1058        )
1059          add_separator = 0;
1060
1061  saved_lengths[0] = total_len;
1062
1063  /* Compute the length of the resulting string. */
1064
1065  nargs = 0;
1066  va_start(va, base);
1067  while ((s = va_arg(va, const char *)) != NULL)
1068    {
1069      len = strlen(s);
1070
1071      assert(svn_dirent_is_canonical(s, pool));
1072
1073      if (SVN_PATH_IS_EMPTY(s))
1074        continue;
1075
1076      if (nargs++ < MAX_SAVED_LENGTHS)
1077        saved_lengths[nargs] = len;
1078
1079      if (dirent_is_rooted(s))
1080        {
1081          total_len = len;
1082          base_arg = nargs;
1083
1084#ifdef SVN_USE_DOS_PATHS
1085          if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1086            {
1087              /* Set new base and skip the current argument */
1088              base = s = svn_dirent_join(base, s, pool);
1089              base_arg++;
1090              saved_lengths[0] = total_len = len = strlen(s);
1091            }
1092          else
1093#endif /* SVN_USE_DOS_PATHS */
1094            {
1095              base = ""; /* Don't add base */
1096              saved_lengths[0] = 0;
1097            }
1098
1099          add_separator = 1;
1100          if (s[len - 1] == '/'
1101#ifdef SVN_USE_DOS_PATHS
1102             || s[len - 1] == ':'
1103#endif
1104              )
1105             add_separator = 0;
1106        }
1107      else if (nargs <= base_arg + 1)
1108        {
1109          total_len += add_separator + len;
1110        }
1111      else
1112        {
1113          total_len += 1 + len;
1114        }
1115    }
1116  va_end(va);
1117
1118  /* base == "/" and no further components. just return that. */
1119  if (add_separator == 0 && total_len == 1)
1120    return apr_pmemdup(pool, "/", 2);
1121
1122  /* we got the total size. allocate it, with room for a NULL character. */
1123  dirent = p = apr_palloc(pool, total_len + 1);
1124
1125  /* if we aren't supposed to skip forward to an absolute component, and if
1126     this is not an empty base that we are skipping, then copy the base
1127     into the output. */
1128  if (! SVN_PATH_IS_EMPTY(base))
1129    {
1130      memcpy(p, base, len = saved_lengths[0]);
1131      p += len;
1132    }
1133
1134  nargs = 0;
1135  va_start(va, base);
1136  while ((s = va_arg(va, const char *)) != NULL)
1137    {
1138      if (SVN_PATH_IS_EMPTY(s))
1139        continue;
1140
1141      if (++nargs < base_arg)
1142        continue;
1143
1144      if (nargs < MAX_SAVED_LENGTHS)
1145        len = saved_lengths[nargs];
1146      else
1147        len = strlen(s);
1148
1149      /* insert a separator if we aren't copying in the first component
1150         (which can happen when base_arg is set). also, don't put in a slash
1151         if the prior character is a slash (occurs when prior component
1152         is "/"). */
1153      if (p != dirent &&
1154          ( ! (nargs - 1 <= base_arg) || add_separator))
1155        *p++ = '/';
1156
1157      /* copy the new component and advance the pointer */
1158      memcpy(p, s, len);
1159      p += len;
1160    }
1161  va_end(va);
1162
1163  *p = '\0';
1164  assert((apr_size_t)(p - dirent) == total_len);
1165
1166  return dirent;
1167}
1168
1169char *
1170svn_relpath_join(const char *base,
1171                 const char *component,
1172                 apr_pool_t *pool)
1173{
1174  apr_size_t blen = strlen(base);
1175  apr_size_t clen = strlen(component);
1176  char *path;
1177
1178  assert(relpath_is_canonical(base));
1179  assert(relpath_is_canonical(component));
1180
1181  /* If either is empty return the other */
1182  if (blen == 0)
1183    return apr_pmemdup(pool, component, clen + 1);
1184  if (clen == 0)
1185    return apr_pmemdup(pool, base, blen + 1);
1186
1187  path = apr_palloc(pool, blen + 1 + clen + 1);
1188  memcpy(path, base, blen);
1189  path[blen] = '/';
1190  memcpy(path + blen + 1, component, clen + 1);
1191
1192  return path;
1193}
1194
1195char *
1196svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1197{
1198  apr_size_t len = strlen(dirent);
1199
1200  assert(svn_dirent_is_canonical(dirent, pool));
1201
1202  if (len == dirent_root_length(dirent, len))
1203    return apr_pstrmemdup(pool, dirent, len);
1204  else
1205    return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1206}
1207
1208const char *
1209svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1210{
1211  apr_size_t len = strlen(dirent);
1212  apr_size_t start;
1213
1214  assert(!pool || svn_dirent_is_canonical(dirent, pool));
1215
1216  if (svn_dirent_is_root(dirent, len))
1217    return "";
1218  else
1219    {
1220      start = len;
1221      while (start > 0 && dirent[start - 1] != '/'
1222#ifdef SVN_USE_DOS_PATHS
1223             && dirent[start - 1] != ':'
1224#endif
1225            )
1226        --start;
1227    }
1228
1229  if (pool)
1230    return apr_pstrmemdup(pool, dirent + start, len - start);
1231  else
1232    return dirent + start;
1233}
1234
1235void
1236svn_dirent_split(const char **dirpath,
1237                 const char **base_name,
1238                 const char *dirent,
1239                 apr_pool_t *pool)
1240{
1241  assert(dirpath != base_name);
1242
1243  if (dirpath)
1244    *dirpath = svn_dirent_dirname(dirent, pool);
1245
1246  if (base_name)
1247    *base_name = svn_dirent_basename(dirent, pool);
1248}
1249
1250char *
1251svn_relpath_dirname(const char *relpath,
1252                    apr_pool_t *pool)
1253{
1254  apr_size_t len = strlen(relpath);
1255
1256  assert(relpath_is_canonical(relpath));
1257
1258  return apr_pstrmemdup(pool, relpath,
1259                        relpath_previous_segment(relpath, len));
1260}
1261
1262const char *
1263svn_relpath_basename(const char *relpath,
1264                     apr_pool_t *pool)
1265{
1266  apr_size_t len = strlen(relpath);
1267  apr_size_t start;
1268
1269  assert(relpath_is_canonical(relpath));
1270
1271  start = len;
1272  while (start > 0 && relpath[start - 1] != '/')
1273    --start;
1274
1275  if (pool)
1276    return apr_pstrmemdup(pool, relpath + start, len - start);
1277  else
1278    return relpath + start;
1279}
1280
1281void
1282svn_relpath_split(const char **dirpath,
1283                  const char **base_name,
1284                  const char *relpath,
1285                  apr_pool_t *pool)
1286{
1287  assert(dirpath != base_name);
1288
1289  if (dirpath)
1290    *dirpath = svn_relpath_dirname(relpath, pool);
1291
1292  if (base_name)
1293    *base_name = svn_relpath_basename(relpath, pool);
1294}
1295
1296char *
1297svn_uri_dirname(const char *uri, apr_pool_t *pool)
1298{
1299  apr_size_t len = strlen(uri);
1300
1301  assert(svn_uri_is_canonical(uri, pool));
1302
1303  if (svn_uri_is_root(uri, len))
1304    return apr_pstrmemdup(pool, uri, len);
1305  else
1306    return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1307}
1308
1309const char *
1310svn_uri_basename(const char *uri, apr_pool_t *pool)
1311{
1312  apr_size_t len = strlen(uri);
1313  apr_size_t start;
1314
1315  assert(svn_uri_is_canonical(uri, NULL));
1316
1317  if (svn_uri_is_root(uri, len))
1318    return "";
1319
1320  start = len;
1321  while (start > 0 && uri[start - 1] != '/')
1322    --start;
1323
1324  return svn_path_uri_decode(uri + start, pool);
1325}
1326
1327void
1328svn_uri_split(const char **dirpath,
1329              const char **base_name,
1330              const char *uri,
1331              apr_pool_t *pool)
1332{
1333  assert(dirpath != base_name);
1334
1335  if (dirpath)
1336    *dirpath = svn_uri_dirname(uri, pool);
1337
1338  if (base_name)
1339    *base_name = svn_uri_basename(uri, pool);
1340}
1341
1342char *
1343svn_dirent_get_longest_ancestor(const char *dirent1,
1344                                const char *dirent2,
1345                                apr_pool_t *pool)
1346{
1347  return apr_pstrndup(pool, dirent1,
1348                      get_longest_ancestor_length(type_dirent, dirent1,
1349                                                  dirent2, pool));
1350}
1351
1352char *
1353svn_relpath_get_longest_ancestor(const char *relpath1,
1354                                 const char *relpath2,
1355                                 apr_pool_t *pool)
1356{
1357  assert(relpath_is_canonical(relpath1));
1358  assert(relpath_is_canonical(relpath2));
1359
1360  return apr_pstrndup(pool, relpath1,
1361                      get_longest_ancestor_length(type_relpath, relpath1,
1362                                                  relpath2, pool));
1363}
1364
1365char *
1366svn_uri_get_longest_ancestor(const char *uri1,
1367                             const char *uri2,
1368                             apr_pool_t *pool)
1369{
1370  apr_size_t uri_ancestor_len;
1371  apr_size_t i = 0;
1372
1373  assert(svn_uri_is_canonical(uri1, NULL));
1374  assert(svn_uri_is_canonical(uri2, NULL));
1375
1376  /* Find ':' */
1377  while (1)
1378    {
1379      /* No shared protocol => no common prefix */
1380      if (uri1[i] != uri2[i])
1381        return apr_pmemdup(pool, SVN_EMPTY_PATH,
1382                           sizeof(SVN_EMPTY_PATH));
1383
1384      if (uri1[i] == ':')
1385        break;
1386
1387      /* They're both URLs, so EOS can't come before ':' */
1388      assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1389
1390      i++;
1391    }
1392
1393  i += 3;  /* Advance past '://' */
1394
1395  uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1396                                                 uri2 + i, pool);
1397
1398  if (uri_ancestor_len == 0 ||
1399      (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1400    return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1401  else
1402    return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1403}
1404
1405const char *
1406svn_dirent_is_child(const char *parent_dirent,
1407                    const char *child_dirent,
1408                    apr_pool_t *pool)
1409{
1410  return is_child(type_dirent, parent_dirent, child_dirent, pool);
1411}
1412
1413const char *
1414svn_dirent_skip_ancestor(const char *parent_dirent,
1415                         const char *child_dirent)
1416{
1417  apr_size_t len = strlen(parent_dirent);
1418  apr_size_t root_len;
1419
1420  if (0 != strncmp(parent_dirent, child_dirent, len))
1421    return NULL; /* parent_dirent is no ancestor of child_dirent */
1422
1423  if (child_dirent[len] == 0)
1424    return ""; /* parent_dirent == child_dirent */
1425
1426  /* Child == parent + more-characters */
1427
1428  root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1429  if (root_len > len)
1430    /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1431    return NULL;
1432
1433  /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1434   * It must be one of the following forms.
1435   *
1436   * rlen parent    child       bad?  rlen=len? c[len]=/?
1437   *  0   ""        "foo"               *
1438   *  0   "b"       "bad"         !
1439   *  0   "b"       "b/foo"                       *
1440   *  1   "/"       "/foo"              *
1441   *  1   "/b"      "/bad"        !
1442   *  1   "/b"      "/b/foo"                      *
1443   *  2   "a:"      "a:foo"             *
1444   *  2   "a:b"     "a:bad"       !
1445   *  2   "a:b"     "a:b/foo"                     *
1446   *  3   "a:/"     "a:/foo"            *
1447   *  3   "a:/b"    "a:/bad"      !
1448   *  3   "a:/b"    "a:/b/foo"                    *
1449   *  5   "//s/s"   "//s/s/foo"         *         *
1450   *  5   "//s/s/b" "//s/s/bad"   !
1451   *  5   "//s/s/b" "//s/s/b/foo"                 *
1452   */
1453
1454  if (child_dirent[len] == '/')
1455    /* "parent|child" is one of:
1456     * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1457    return child_dirent + len + 1;
1458
1459  if (root_len == len)
1460    /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1461    return child_dirent + len;
1462
1463  return NULL;
1464}
1465
1466const char *
1467svn_relpath_skip_ancestor(const char *parent_relpath,
1468                          const char *child_relpath)
1469{
1470  apr_size_t len = strlen(parent_relpath);
1471
1472  assert(relpath_is_canonical(parent_relpath));
1473  assert(relpath_is_canonical(child_relpath));
1474
1475  if (len == 0)
1476    return child_relpath;
1477
1478  if (0 != strncmp(parent_relpath, child_relpath, len))
1479    return NULL; /* parent_relpath is no ancestor of child_relpath */
1480
1481  if (child_relpath[len] == 0)
1482    return ""; /* parent_relpath == child_relpath */
1483
1484  if (child_relpath[len] == '/')
1485    return child_relpath + len + 1;
1486
1487  return NULL;
1488}
1489
1490
1491/* */
1492static const char *
1493uri_skip_ancestor(const char *parent_uri,
1494                  const char *child_uri)
1495{
1496  apr_size_t len = strlen(parent_uri);
1497
1498  assert(svn_uri_is_canonical(parent_uri, NULL));
1499  assert(svn_uri_is_canonical(child_uri, NULL));
1500
1501  if (0 != strncmp(parent_uri, child_uri, len))
1502    return NULL; /* parent_uri is no ancestor of child_uri */
1503
1504  if (child_uri[len] == 0)
1505    return ""; /* parent_uri == child_uri */
1506
1507  if (child_uri[len] == '/')
1508    return child_uri + len + 1;
1509
1510  return NULL;
1511}
1512
1513const char *
1514svn_uri_skip_ancestor(const char *parent_uri,
1515                      const char *child_uri,
1516                      apr_pool_t *result_pool)
1517{
1518  const char *result = uri_skip_ancestor(parent_uri, child_uri);
1519
1520  return result ? svn_path_uri_decode(result, result_pool) : NULL;
1521}
1522
1523svn_boolean_t
1524svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1525{
1526  return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1527}
1528
1529svn_boolean_t
1530svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1531{
1532  return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1533}
1534
1535
1536svn_boolean_t
1537svn_dirent_is_absolute(const char *dirent)
1538{
1539  if (! dirent)
1540    return FALSE;
1541
1542  /* dirent is absolute if it starts with '/' on non-Windows platforms
1543     or with '//' on Windows platforms */
1544  if (dirent[0] == '/'
1545#ifdef SVN_USE_DOS_PATHS
1546      && dirent[1] == '/' /* Single '/' depends on current drive */
1547#endif
1548      )
1549    return TRUE;
1550
1551  /* On Windows, dirent is also absolute when it starts with 'H:/'
1552     where 'H' is any letter. */
1553#ifdef SVN_USE_DOS_PATHS
1554  if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1555      (dirent[1] == ':') && (dirent[2] == '/'))
1556     return TRUE;
1557#endif /* SVN_USE_DOS_PATHS */
1558
1559  return FALSE;
1560}
1561
1562svn_error_t *
1563svn_dirent_get_absolute(const char **pabsolute,
1564                        const char *relative,
1565                        apr_pool_t *pool)
1566{
1567  char *buffer;
1568  apr_status_t apr_err;
1569  const char *path_apr;
1570
1571  SVN_ERR_ASSERT(! svn_path_is_url(relative));
1572
1573  /* Merge the current working directory with the relative dirent. */
1574  SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1575
1576  apr_err = apr_filepath_merge(&buffer, NULL,
1577                               path_apr,
1578                               APR_FILEPATH_NOTRELATIVE,
1579                               pool);
1580  if (apr_err)
1581    {
1582      /* In some cases when the passed path or its ancestor(s) do not exist
1583         or no longer exist apr returns an error.
1584
1585         In many of these cases we would like to return a path anyway, when the
1586         passed path was already a safe absolute path. So check for that now to
1587         avoid an error.
1588
1589         svn_dirent_is_absolute() doesn't perform the necessary checks to see
1590         if the path doesn't need post processing to be in the canonical absolute
1591         format.
1592         */
1593
1594      if (svn_dirent_is_absolute(relative)
1595          && svn_dirent_is_canonical(relative, pool)
1596          && !svn_path_is_backpath_present(relative))
1597        {
1598          *pabsolute = apr_pstrdup(pool, relative);
1599          return SVN_NO_ERROR;
1600        }
1601
1602      return svn_error_createf(SVN_ERR_BAD_FILENAME,
1603                               svn_error_create(apr_err, NULL, NULL),
1604                               _("Couldn't determine absolute path of '%s'"),
1605                               svn_dirent_local_style(relative, pool));
1606    }
1607
1608  SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1609  *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1610  return SVN_NO_ERROR;
1611}
1612
1613const char *
1614svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1615{
1616  return canonicalize(type_uri, uri, pool);
1617}
1618
1619const char *
1620svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1621{
1622  return canonicalize(type_relpath, relpath, pool);
1623}
1624
1625const char *
1626svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1627{
1628  const char *dst = canonicalize(type_dirent, dirent, pool);
1629
1630#ifdef SVN_USE_DOS_PATHS
1631  /* Handle a specific case on Windows where path == "X:/". Here we have to
1632     append the final '/', as svn_path_canonicalize will chop this of. */
1633  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1634        (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1635        dirent[1] == ':' && dirent[2] == '/' &&
1636        dst[3] == '\0')
1637    {
1638      char *dst_slash = apr_pcalloc(pool, 4);
1639      dst_slash[0] = canonicalize_to_upper(dirent[0]);
1640      dst_slash[1] = ':';
1641      dst_slash[2] = '/';
1642      dst_slash[3] = '\0';
1643
1644      return dst_slash;
1645    }
1646#endif /* SVN_USE_DOS_PATHS */
1647
1648  return dst;
1649}
1650
1651svn_boolean_t
1652svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1653{
1654  const char *ptr = dirent;
1655  if (*ptr == '/')
1656    {
1657      ptr++;
1658#ifdef SVN_USE_DOS_PATHS
1659      /* Check for UNC paths */
1660      if (*ptr == '/')
1661        {
1662          /* TODO: Scan hostname and sharename and fall back to part code */
1663
1664          /* ### Fall back to old implementation */
1665          return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1666                  == 0);
1667        }
1668#endif /* SVN_USE_DOS_PATHS */
1669    }
1670#ifdef SVN_USE_DOS_PATHS
1671  else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1672           (ptr[1] == ':'))
1673    {
1674      /* The only canonical drive names are "A:"..."Z:", no lower case */
1675      if (*ptr < 'A' || *ptr > 'Z')
1676        return FALSE;
1677
1678      ptr += 2;
1679
1680      if (*ptr == '/')
1681        ptr++;
1682    }
1683#endif /* SVN_USE_DOS_PATHS */
1684
1685  return relpath_is_canonical(ptr);
1686}
1687
1688static svn_boolean_t
1689relpath_is_canonical(const char *relpath)
1690{
1691  const char *ptr = relpath, *seg = relpath;
1692
1693  /* RELPATH is canonical if it has:
1694   *  - no '.' segments
1695   *  - no start and closing '/'
1696   *  - no '//'
1697   */
1698
1699  if (*relpath == '\0')
1700    return TRUE;
1701
1702  if (*ptr == '/')
1703    return FALSE;
1704
1705  /* Now validate the rest of the path. */
1706  while(1)
1707    {
1708      apr_size_t seglen = ptr - seg;
1709
1710      if (seglen == 1 && *seg == '.')
1711        return FALSE;  /*  /./   */
1712
1713      if (*ptr == '/' && *(ptr+1) == '/')
1714        return FALSE;  /*  //    */
1715
1716      if (! *ptr && *(ptr - 1) == '/')
1717        return FALSE;  /* foo/  */
1718
1719      if (! *ptr)
1720        break;
1721
1722      if (*ptr == '/')
1723        ptr++;
1724      seg = ptr;
1725
1726      while (*ptr && (*ptr != '/'))
1727        ptr++;
1728    }
1729
1730  return TRUE;
1731}
1732
1733svn_boolean_t
1734svn_relpath_is_canonical(const char *relpath)
1735{
1736  return relpath_is_canonical(relpath);
1737}
1738
1739svn_boolean_t
1740svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1741{
1742  const char *ptr = uri, *seg = uri;
1743  const char *schema_data = NULL;
1744
1745  /* URI is canonical if it has:
1746   *  - lowercase URL scheme
1747   *  - lowercase URL hostname
1748   *  - no '.' segments
1749   *  - no closing '/'
1750   *  - no '//'
1751   *  - uppercase hex-encoded pair digits ("%AB", not "%ab")
1752   */
1753
1754  if (*uri == '\0')
1755    return FALSE;
1756
1757  if (! svn_path_is_url(uri))
1758    return FALSE;
1759
1760  /* Skip the scheme. */
1761  while (*ptr && (*ptr != '/') && (*ptr != ':'))
1762    ptr++;
1763
1764  /* No scheme?  No good. */
1765  if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1766    return FALSE;
1767
1768  /* Found a scheme, check that it's all lowercase. */
1769  ptr = uri;
1770  while (*ptr != ':')
1771    {
1772      if (*ptr >= 'A' && *ptr <= 'Z')
1773        return FALSE;
1774      ptr++;
1775    }
1776  /* Skip :// */
1777  ptr += 3;
1778
1779  /* Scheme only?  That works. */
1780  if (! *ptr)
1781    return TRUE;
1782
1783  /* This might be the hostname */
1784  seg = ptr;
1785  while (*ptr && (*ptr != '/') && (*ptr != '@'))
1786    ptr++;
1787
1788  if (*ptr == '@')
1789    seg = ptr + 1;
1790
1791  /* Found a hostname, check that it's all lowercase. */
1792  ptr = seg;
1793
1794  if (*ptr == '[')
1795    {
1796      ptr++;
1797      while (*ptr == ':'
1798             || (*ptr >= '0' && *ptr <= '9')
1799             || (*ptr >= 'a' && *ptr <= 'f'))
1800        {
1801          ptr++;
1802        }
1803
1804      if (*ptr != ']')
1805        return FALSE;
1806      ptr++;
1807    }
1808  else
1809    while (*ptr && *ptr != '/' && *ptr != ':')
1810      {
1811        if (*ptr >= 'A' && *ptr <= 'Z')
1812          return FALSE;
1813        ptr++;
1814      }
1815
1816  /* Found a portnumber */
1817  if (*ptr == ':')
1818    {
1819      apr_int64_t port = 0;
1820
1821      ptr++;
1822      schema_data = ptr;
1823
1824      while (*ptr >= '0' && *ptr <= '9')
1825        {
1826          port = 10 * port + (*ptr - '0');
1827          ptr++;
1828        }
1829
1830      if (ptr == schema_data)
1831        return FALSE; /* Fail on "http://host:" */
1832
1833      if (*ptr && *ptr != '/')
1834        return FALSE; /* Not a port number */
1835
1836      if (port == 80 && strncmp(uri, "http:", 5) == 0)
1837        return FALSE;
1838      else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1839        return FALSE;
1840      else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1841        return FALSE;
1842    }
1843
1844  schema_data = ptr;
1845
1846#ifdef SVN_USE_DOS_PATHS
1847  if (schema_data && *ptr == '/')
1848    {
1849      /* If this is a file url, ptr now points to the third '/' in
1850         file:///C:/path. Check that if we have such a URL the drive
1851         letter is in uppercase. */
1852      if (strncmp(uri, "file:", 5) == 0 &&
1853          ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1854          *(ptr+2) == ':')
1855        return FALSE;
1856    }
1857#endif /* SVN_USE_DOS_PATHS */
1858
1859  /* Now validate the rest of the URI. */
1860  while(1)
1861    {
1862      apr_size_t seglen = ptr - seg;
1863
1864      if (seglen == 1 && *seg == '.')
1865        return FALSE;  /*  /./   */
1866
1867      if (*ptr == '/' && *(ptr+1) == '/')
1868        return FALSE;  /*  //    */
1869
1870      if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1871        return FALSE;  /* foo/  */
1872
1873      if (! *ptr)
1874        break;
1875
1876      if (*ptr == '/')
1877        ptr++;
1878      seg = ptr;
1879
1880
1881      while (*ptr && (*ptr != '/'))
1882        ptr++;
1883    }
1884
1885  ptr = schema_data;
1886
1887  while (*ptr)
1888    {
1889      if (*ptr == '%')
1890        {
1891          char digitz[3];
1892          int val;
1893
1894          /* Can't usesvn_ctype_isxdigit() because lower case letters are
1895             not in our canonical format */
1896          if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1897              && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1898            return FALSE;
1899          else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1900                   && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1901            return FALSE;
1902
1903          digitz[0] = *(++ptr);
1904          digitz[1] = *(++ptr);
1905          digitz[2] = '\0';
1906          val = (int)strtol(digitz, NULL, 16);
1907
1908          if (svn_uri__char_validity[val])
1909            return FALSE; /* Should not have been escaped */
1910        }
1911      else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1912        return FALSE; /* Character should have been escaped */
1913      ptr++;
1914    }
1915
1916  return TRUE;
1917}
1918
1919svn_error_t *
1920svn_dirent_condense_targets(const char **pcommon,
1921                            apr_array_header_t **pcondensed_targets,
1922                            const apr_array_header_t *targets,
1923                            svn_boolean_t remove_redundancies,
1924                            apr_pool_t *result_pool,
1925                            apr_pool_t *scratch_pool)
1926{
1927  int i, num_condensed = targets->nelts;
1928  svn_boolean_t *removed;
1929  apr_array_header_t *abs_targets;
1930
1931  /* Early exit when there's no data to work on. */
1932  if (targets->nelts <= 0)
1933    {
1934      *pcommon = NULL;
1935      if (pcondensed_targets)
1936        *pcondensed_targets = NULL;
1937      return SVN_NO_ERROR;
1938    }
1939
1940  /* Get the absolute path of the first target. */
1941  SVN_ERR(svn_dirent_get_absolute(pcommon,
1942                                  APR_ARRAY_IDX(targets, 0, const char *),
1943                                  scratch_pool));
1944
1945  /* Early exit when there's only one dirent to work on. */
1946  if (targets->nelts == 1)
1947    {
1948      *pcommon = apr_pstrdup(result_pool, *pcommon);
1949      if (pcondensed_targets)
1950        *pcondensed_targets = apr_array_make(result_pool, 0,
1951                                             sizeof(const char *));
1952      return SVN_NO_ERROR;
1953    }
1954
1955  /* Copy the targets array, but with absolute dirents instead of
1956     relative.  Also, find the pcommon argument by finding what is
1957     common in all of the absolute dirents. NOTE: This is not as
1958     efficient as it could be.  The calculation of the basedir could
1959     be done in the loop below, which would save some calls to
1960     svn_dirent_get_longest_ancestor.  I decided to do it this way
1961     because I thought it would be simpler, since this way, we don't
1962     even do the loop if we don't need to condense the targets. */
1963
1964  removed = apr_pcalloc(scratch_pool, (targets->nelts *
1965                                          sizeof(svn_boolean_t)));
1966  abs_targets = apr_array_make(scratch_pool, targets->nelts,
1967                               sizeof(const char *));
1968
1969  APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
1970
1971  for (i = 1; i < targets->nelts; ++i)
1972    {
1973      const char *rel = APR_ARRAY_IDX(targets, i, const char *);
1974      const char *absolute;
1975      SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
1976      APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
1977      *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
1978                                                 scratch_pool);
1979    }
1980
1981  *pcommon = apr_pstrdup(result_pool, *pcommon);
1982
1983  if (pcondensed_targets != NULL)
1984    {
1985      size_t basedir_len;
1986
1987      if (remove_redundancies)
1988        {
1989          /* Find the common part of each pair of targets.  If
1990             common part is equal to one of the dirents, the other
1991             is a child of it, and can be removed.  If a target is
1992             equal to *pcommon, it can also be removed. */
1993
1994          /* First pass: when one non-removed target is a child of
1995             another non-removed target, remove the child. */
1996          for (i = 0; i < abs_targets->nelts; ++i)
1997            {
1998              int j;
1999
2000              if (removed[i])
2001                continue;
2002
2003              for (j = i + 1; j < abs_targets->nelts; ++j)
2004                {
2005                  const char *abs_targets_i;
2006                  const char *abs_targets_j;
2007                  const char *ancestor;
2008
2009                  if (removed[j])
2010                    continue;
2011
2012                  abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2013                  abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2014
2015                  ancestor = svn_dirent_get_longest_ancestor
2016                    (abs_targets_i, abs_targets_j, scratch_pool);
2017
2018                  if (*ancestor == '\0')
2019                    continue;
2020
2021                  if (strcmp(ancestor, abs_targets_i) == 0)
2022                    {
2023                      removed[j] = TRUE;
2024                      num_condensed--;
2025                    }
2026                  else if (strcmp(ancestor, abs_targets_j) == 0)
2027                    {
2028                      removed[i] = TRUE;
2029                      num_condensed--;
2030                    }
2031                }
2032            }
2033
2034          /* Second pass: when a target is the same as *pcommon,
2035             remove the target. */
2036          for (i = 0; i < abs_targets->nelts; ++i)
2037            {
2038              const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2039                                                        const char *);
2040
2041              if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2042                {
2043                  removed[i] = TRUE;
2044                  num_condensed--;
2045                }
2046            }
2047        }
2048
2049      /* Now create the return array, and copy the non-removed items */
2050      basedir_len = strlen(*pcommon);
2051      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2052                                           sizeof(const char *));
2053
2054      for (i = 0; i < abs_targets->nelts; ++i)
2055        {
2056          const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2057
2058          /* Skip this if it's been removed. */
2059          if (removed[i])
2060            continue;
2061
2062          /* If a common prefix was found, condensed_targets are given
2063             relative to that prefix.  */
2064          if (basedir_len > 0)
2065            {
2066              /* Only advance our pointer past a dirent separator if
2067                 REL_ITEM isn't the same as *PCOMMON.
2068
2069                 If *PCOMMON is a root dirent, basedir_len will already
2070                 include the closing '/', so never advance the pointer
2071                 here.
2072                 */
2073              rel_item += basedir_len;
2074              if (rel_item[0] &&
2075                  ! svn_dirent_is_root(*pcommon, basedir_len))
2076                rel_item++;
2077            }
2078
2079          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2080            = apr_pstrdup(result_pool, rel_item);
2081        }
2082    }
2083
2084  return SVN_NO_ERROR;
2085}
2086
2087svn_error_t *
2088svn_uri_condense_targets(const char **pcommon,
2089                         apr_array_header_t **pcondensed_targets,
2090                         const apr_array_header_t *targets,
2091                         svn_boolean_t remove_redundancies,
2092                         apr_pool_t *result_pool,
2093                         apr_pool_t *scratch_pool)
2094{
2095  int i, num_condensed = targets->nelts;
2096  apr_array_header_t *uri_targets;
2097  svn_boolean_t *removed;
2098
2099  /* Early exit when there's no data to work on. */
2100  if (targets->nelts <= 0)
2101    {
2102      *pcommon = NULL;
2103      if (pcondensed_targets)
2104        *pcondensed_targets = NULL;
2105      return SVN_NO_ERROR;
2106    }
2107
2108  *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2109                                  scratch_pool);
2110
2111  /* Early exit when there's only one uri to work on. */
2112  if (targets->nelts == 1)
2113    {
2114      *pcommon = apr_pstrdup(result_pool, *pcommon);
2115      if (pcondensed_targets)
2116        *pcondensed_targets = apr_array_make(result_pool, 0,
2117                                             sizeof(const char *));
2118      return SVN_NO_ERROR;
2119    }
2120
2121  /* Find the pcommon argument by finding what is common in all of the
2122     uris. NOTE: This is not as efficient as it could be.  The calculation
2123     of the basedir could be done in the loop below, which would
2124     save some calls to svn_uri_get_longest_ancestor.  I decided to do it
2125     this way because I thought it would be simpler, since this way, we don't
2126     even do the loop if we don't need to condense the targets. */
2127
2128  removed = apr_pcalloc(scratch_pool, (targets->nelts *
2129                                          sizeof(svn_boolean_t)));
2130  uri_targets = apr_array_make(scratch_pool, targets->nelts,
2131                               sizeof(const char *));
2132
2133  APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2134
2135  for (i = 1; i < targets->nelts; ++i)
2136    {
2137      const char *uri = svn_uri_canonicalize(
2138                           APR_ARRAY_IDX(targets, i, const char *),
2139                           scratch_pool);
2140      APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2141
2142      /* If the commonmost ancestor so far is empty, there's no point
2143         in continuing to search for a common ancestor at all.  But
2144         we'll keep looping for the sake of canonicalizing the
2145         targets, I suppose.  */
2146      if (**pcommon != '\0')
2147        *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2148                                                scratch_pool);
2149    }
2150
2151  *pcommon = apr_pstrdup(result_pool, *pcommon);
2152
2153  if (pcondensed_targets != NULL)
2154    {
2155      size_t basedir_len;
2156
2157      if (remove_redundancies)
2158        {
2159          /* Find the common part of each pair of targets.  If
2160             common part is equal to one of the dirents, the other
2161             is a child of it, and can be removed.  If a target is
2162             equal to *pcommon, it can also be removed. */
2163
2164          /* First pass: when one non-removed target is a child of
2165             another non-removed target, remove the child. */
2166          for (i = 0; i < uri_targets->nelts; ++i)
2167            {
2168              int j;
2169
2170              if (removed[i])
2171                continue;
2172
2173              for (j = i + 1; j < uri_targets->nelts; ++j)
2174                {
2175                  const char *uri_i;
2176                  const char *uri_j;
2177                  const char *ancestor;
2178
2179                  if (removed[j])
2180                    continue;
2181
2182                  uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2183                  uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2184
2185                  ancestor = svn_uri_get_longest_ancestor(uri_i,
2186                                                          uri_j,
2187                                                          scratch_pool);
2188
2189                  if (*ancestor == '\0')
2190                    continue;
2191
2192                  if (strcmp(ancestor, uri_i) == 0)
2193                    {
2194                      removed[j] = TRUE;
2195                      num_condensed--;
2196                    }
2197                  else if (strcmp(ancestor, uri_j) == 0)
2198                    {
2199                      removed[i] = TRUE;
2200                      num_condensed--;
2201                    }
2202                }
2203            }
2204
2205          /* Second pass: when a target is the same as *pcommon,
2206             remove the target. */
2207          for (i = 0; i < uri_targets->nelts; ++i)
2208            {
2209              const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2210                                                        const char *);
2211
2212              if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2213                {
2214                  removed[i] = TRUE;
2215                  num_condensed--;
2216                }
2217            }
2218        }
2219
2220      /* Now create the return array, and copy the non-removed items */
2221      basedir_len = strlen(*pcommon);
2222      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2223                                           sizeof(const char *));
2224
2225      for (i = 0; i < uri_targets->nelts; ++i)
2226        {
2227          const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2228
2229          /* Skip this if it's been removed. */
2230          if (removed[i])
2231            continue;
2232
2233          /* If a common prefix was found, condensed_targets are given
2234             relative to that prefix.  */
2235          if (basedir_len > 0)
2236            {
2237              /* Only advance our pointer past a dirent separator if
2238                 REL_ITEM isn't the same as *PCOMMON.
2239
2240                 If *PCOMMON is a root dirent, basedir_len will already
2241                 include the closing '/', so never advance the pointer
2242                 here.
2243                 */
2244              rel_item += basedir_len;
2245              if ((rel_item[0] == '/') ||
2246                  (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2247                {
2248                  rel_item++;
2249                }
2250            }
2251
2252          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2253            = svn_path_uri_decode(rel_item, result_pool);
2254        }
2255    }
2256
2257  return SVN_NO_ERROR;
2258}
2259
2260svn_error_t *
2261svn_dirent_is_under_root(svn_boolean_t *under_root,
2262                         const char **result_path,
2263                         const char *base_path,
2264                         const char *path,
2265                         apr_pool_t *result_pool)
2266{
2267  apr_status_t status;
2268  char *full_path;
2269
2270  *under_root = FALSE;
2271  if (result_path)
2272    *result_path = NULL;
2273
2274  status = apr_filepath_merge(&full_path,
2275                              base_path,
2276                              path,
2277                              APR_FILEPATH_NOTABOVEROOT
2278                              | APR_FILEPATH_SECUREROOTTEST,
2279                              result_pool);
2280
2281  if (status == APR_SUCCESS)
2282    {
2283      if (result_path)
2284        *result_path = svn_dirent_canonicalize(full_path, result_pool);
2285      *under_root = TRUE;
2286      return SVN_NO_ERROR;
2287    }
2288  else if (status == APR_EABOVEROOT)
2289    {
2290      *under_root = FALSE;
2291      return SVN_NO_ERROR;
2292    }
2293
2294  return svn_error_wrap_apr(status, NULL);
2295}
2296
2297svn_error_t *
2298svn_uri_get_dirent_from_file_url(const char **dirent,
2299                                 const char *url,
2300                                 apr_pool_t *pool)
2301{
2302  const char *hostname, *path;
2303
2304  SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2305
2306  /* Verify that the URL is well-formed (loosely) */
2307
2308  /* First, check for the "file://" prefix. */
2309  if (strncmp(url, "file://", 7) != 0)
2310    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2311                             _("Local URL '%s' does not contain 'file://' "
2312                               "prefix"), url);
2313
2314  /* Find the HOSTNAME portion and the PATH portion of the URL.  The host
2315     name is between the "file://" prefix and the next occurence of '/'.  We
2316     are considering everything from that '/' until the end of the URL to be
2317     the absolute path portion of the URL.
2318     If we got just "file://", treat it the same as "file:///". */
2319  hostname = url + 7;
2320  path = strchr(hostname, '/');
2321  if (path)
2322    hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2323  else
2324    path = "/";
2325
2326  /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2327  if (*hostname == '\0')
2328    hostname = NULL;
2329  else
2330    {
2331      hostname = svn_path_uri_decode(hostname, pool);
2332      if (strcmp(hostname, "localhost") == 0)
2333        hostname = NULL;
2334    }
2335
2336  /* Duplicate the URL, starting at the top of the path.
2337     At the same time, we URI-decode the path. */
2338#ifdef SVN_USE_DOS_PATHS
2339  /* On Windows, we'll typically have to skip the leading / if the
2340     path starts with a drive letter.  Like most Web browsers, We
2341     support two variants of this scheme:
2342
2343         file:///X:/path    and
2344         file:///X|/path
2345
2346    Note that, at least on WinNT and above,  file:////./X:/path  will
2347    also work, so we must make sure the transformation doesn't break
2348    that, and  file:///path  (that looks within the current drive
2349    only) should also keep working.
2350    If we got a non-empty hostname other than localhost, we convert this
2351    into an UNC path.  In this case, we obviously don't strip the slash
2352    even if the path looks like it starts with a drive letter.
2353  */
2354  {
2355    static const char valid_drive_letters[] =
2356      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2357    /* Casting away const! */
2358    char *dup_path = (char *)svn_path_uri_decode(path, pool);
2359
2360    /* This check assumes ':' and '|' are already decoded! */
2361    if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2362        && (dup_path[2] == ':' || dup_path[2] == '|'))
2363      {
2364        /* Skip the leading slash. */
2365        ++dup_path;
2366
2367        if (dup_path[1] == '|')
2368          dup_path[1] = ':';
2369
2370        if (dup_path[2] == '/' || dup_path[2] == '\0')
2371          {
2372            if (dup_path[2] == '\0')
2373              {
2374                /* A valid dirent for the driveroot must be like "C:/" instead of
2375                   just "C:" or svn_dirent_join() will use the current directory
2376                   on the drive instead */
2377                char *new_path = apr_pcalloc(pool, 4);
2378                new_path[0] = dup_path[0];
2379                new_path[1] = ':';
2380                new_path[2] = '/';
2381                new_path[3] = '\0';
2382                dup_path = new_path;
2383              }
2384          }
2385      }
2386    if (hostname)
2387      {
2388        if (dup_path[0] == '/' && dup_path[1] == '\0')
2389          return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2390                                   _("Local URL '%s' contains only a hostname, "
2391                                     "no path"), url);
2392
2393        /* We still know that the path starts with a slash. */
2394        *dirent = apr_pstrcat(pool, "//", hostname, dup_path, NULL);
2395      }
2396    else
2397      *dirent = dup_path;
2398  }
2399#else /* !SVN_USE_DOS_PATHS */
2400  /* Currently, the only hostnames we are allowing on non-Win32 platforms
2401     are the empty string and 'localhost'. */
2402  if (hostname)
2403    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2404                             _("Local URL '%s' contains unsupported hostname"),
2405                             url);
2406
2407  *dirent = svn_path_uri_decode(path, pool);
2408#endif /* SVN_USE_DOS_PATHS */
2409  return SVN_NO_ERROR;
2410}
2411
2412svn_error_t *
2413svn_uri_get_file_url_from_dirent(const char **url,
2414                                 const char *dirent,
2415                                 apr_pool_t *pool)
2416{
2417  assert(svn_dirent_is_canonical(dirent, pool));
2418
2419  SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2420
2421  dirent = svn_path_uri_encode(dirent, pool);
2422
2423#ifndef SVN_USE_DOS_PATHS
2424  if (dirent[0] == '/' && dirent[1] == '\0')
2425    dirent = NULL; /* "file://" is the canonical form of "file:///" */
2426
2427  *url = apr_pstrcat(pool, "file://", dirent, (char *)NULL);
2428#else
2429  if (dirent[0] == '/')
2430    {
2431      /* Handle UNC paths //server/share -> file://server/share */
2432      assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2433
2434      *url = apr_pstrcat(pool, "file:", dirent, NULL);
2435    }
2436  else
2437    {
2438      char *uri = apr_pstrcat(pool, "file:///", dirent, NULL);
2439      apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2440
2441      /* "C:/" is a canonical dirent on Windows,
2442         but "file:///C:/" is not a canonical uri */
2443      if (uri[len-1] == '/')
2444        uri[len-1] = '\0';
2445
2446      *url = uri;
2447    }
2448#endif
2449
2450  return SVN_NO_ERROR;
2451}
2452
2453
2454
2455/* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2456
2457svn_boolean_t
2458svn_fspath__is_canonical(const char *fspath)
2459{
2460  return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2461}
2462
2463
2464const char *
2465svn_fspath__canonicalize(const char *fspath,
2466                         apr_pool_t *pool)
2467{
2468  if ((fspath[0] == '/') && (fspath[1] == '\0'))
2469    return "/";
2470
2471  return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2472                     (char *)NULL);
2473}
2474
2475
2476svn_boolean_t
2477svn_fspath__is_root(const char *fspath, apr_size_t len)
2478{
2479  /* directory is root if it's equal to '/' */
2480  return (len == 1 && fspath[0] == '/');
2481}
2482
2483
2484const char *
2485svn_fspath__skip_ancestor(const char *parent_fspath,
2486                          const char *child_fspath)
2487{
2488  assert(svn_fspath__is_canonical(parent_fspath));
2489  assert(svn_fspath__is_canonical(child_fspath));
2490
2491  return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2492}
2493
2494
2495const char *
2496svn_fspath__dirname(const char *fspath,
2497                    apr_pool_t *pool)
2498{
2499  assert(svn_fspath__is_canonical(fspath));
2500
2501  if (fspath[0] == '/' && fspath[1] == '\0')
2502    return apr_pstrdup(pool, fspath);
2503  else
2504    return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2505                       (char *)NULL);
2506}
2507
2508
2509const char *
2510svn_fspath__basename(const char *fspath,
2511                     apr_pool_t *pool)
2512{
2513  const char *result;
2514  assert(svn_fspath__is_canonical(fspath));
2515
2516  result = svn_relpath_basename(fspath + 1, pool);
2517
2518  assert(strchr(result, '/') == NULL);
2519  return result;
2520}
2521
2522void
2523svn_fspath__split(const char **dirpath,
2524                  const char **base_name,
2525                  const char *fspath,
2526                  apr_pool_t *result_pool)
2527{
2528  assert(dirpath != base_name);
2529
2530  if (dirpath)
2531    *dirpath = svn_fspath__dirname(fspath, result_pool);
2532
2533  if (base_name)
2534    *base_name = svn_fspath__basename(fspath, result_pool);
2535}
2536
2537char *
2538svn_fspath__join(const char *fspath,
2539                 const char *relpath,
2540                 apr_pool_t *result_pool)
2541{
2542  char *result;
2543  assert(svn_fspath__is_canonical(fspath));
2544  assert(svn_relpath_is_canonical(relpath));
2545
2546  if (relpath[0] == '\0')
2547    result = apr_pstrdup(result_pool, fspath);
2548  else if (fspath[1] == '\0')
2549    result = apr_pstrcat(result_pool, "/", relpath, (char *)NULL);
2550  else
2551    result = apr_pstrcat(result_pool, fspath, "/", relpath, (char *)NULL);
2552
2553  assert(svn_fspath__is_canonical(result));
2554  return result;
2555}
2556
2557char *
2558svn_fspath__get_longest_ancestor(const char *fspath1,
2559                                 const char *fspath2,
2560                                 apr_pool_t *result_pool)
2561{
2562  char *result;
2563  assert(svn_fspath__is_canonical(fspath1));
2564  assert(svn_fspath__is_canonical(fspath2));
2565
2566  result = apr_pstrcat(result_pool, "/",
2567                       svn_relpath_get_longest_ancestor(fspath1 + 1,
2568                                                        fspath2 + 1,
2569                                                        result_pool),
2570                       (char *)NULL);
2571
2572  assert(svn_fspath__is_canonical(result));
2573  return result;
2574}
2575
2576
2577
2578
2579/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2580
2581const char *
2582svn_urlpath__canonicalize(const char *uri,
2583                          apr_pool_t *pool)
2584{
2585  if (svn_path_is_url(uri))
2586    {
2587      uri = svn_uri_canonicalize(uri, pool);
2588    }
2589  else
2590    {
2591      uri = svn_fspath__canonicalize(uri, pool);
2592      /* Do a little dance to normalize hex encoding. */
2593      uri = svn_path_uri_decode(uri, pool);
2594      uri = svn_path_uri_encode(uri, pool);
2595    }
2596  return uri;
2597}
2598