dirent_uri.c revision 269847
1/*
2 * dirent_uri.c:   a library to manipulate URIs and directory entries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28#include <ctype.h>
29
30#include <apr_uri.h>
31#include <apr_lib.h>
32
33#include "svn_private_config.h"
34#include "svn_string.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_ctype.h"
38
39#include "dirent_uri.h"
40#include "private/svn_fspath.h"
41#include "private/svn_cert.h"
42
43/* The canonical empty path.  Can this be changed?  Well, change the empty
44   test below and the path library will work, not so sure about the fs/wc
45   libraries. */
46#define SVN_EMPTY_PATH ""
47
48/* TRUE if s is the canonical empty path, FALSE otherwise */
49#define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
50
51/* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
52   this be changed?  Well, the path library will work, not so sure about
53   the OS! */
54#define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
55
56/* This check must match the check on top of dirent_uri-tests.c and
57   path-tests.c */
58#if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
59#define SVN_USE_DOS_PATHS
60#endif
61
62/* Path type definition. Used only by internal functions. */
63typedef enum path_type_t {
64  type_uri,
65  type_dirent,
66  type_relpath
67} path_type_t;
68
69
70/**** Forward declarations *****/
71
72static svn_boolean_t
73relpath_is_canonical(const char *relpath);
74
75
76/**** Internal implementation functions *****/
77
78/* Return an internal-style new path based on PATH, allocated in POOL.
79 *
80 * "Internal-style" means that separators are all '/'.
81 */
82static const char *
83internal_style(const char *path, apr_pool_t *pool)
84{
85#if '/' != SVN_PATH_LOCAL_SEPARATOR
86    {
87      char *p = apr_pstrdup(pool, path);
88      path = p;
89
90      /* Convert all local-style separators to the canonical ones. */
91      for (; *p != '\0'; ++p)
92        if (*p == SVN_PATH_LOCAL_SEPARATOR)
93          *p = '/';
94    }
95#endif
96
97  return path;
98}
99
100/* Locale insensitive tolower() for converting parts of dirents and urls
101   while canonicalizing */
102static char
103canonicalize_to_lower(char c)
104{
105  if (c < 'A' || c > 'Z')
106    return c;
107  else
108    return (char)(c - 'A' + 'a');
109}
110
111/* Locale insensitive toupper() for converting parts of dirents and urls
112   while canonicalizing */
113static char
114canonicalize_to_upper(char c)
115{
116  if (c < 'a' || c > 'z')
117    return c;
118  else
119    return (char)(c - 'a' + 'A');
120}
121
122/* Calculates the length of the dirent absolute or non absolute root in
123   DIRENT, return 0 if dirent is not rooted  */
124static apr_size_t
125dirent_root_length(const char *dirent, apr_size_t len)
126{
127#ifdef SVN_USE_DOS_PATHS
128  if (len >= 2 && dirent[1] == ':' &&
129      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
130       (dirent[0] >= 'a' && dirent[0] <= 'z')))
131    {
132      return (len > 2 && dirent[2] == '/') ? 3 : 2;
133    }
134
135  if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
136    {
137      apr_size_t i = 2;
138
139      while (i < len && dirent[i] != '/')
140        i++;
141
142      if (i == len)
143        return len; /* Cygwin drive alias, invalid path on WIN32 */
144
145      i++; /* Skip '/' */
146
147      while (i < len && dirent[i] != '/')
148        i++;
149
150      return i;
151    }
152#endif /* SVN_USE_DOS_PATHS */
153  if (len >= 1 && dirent[0] == '/')
154    return 1;
155
156  return 0;
157}
158
159
160/* Return the length of substring necessary to encompass the entire
161 * previous dirent segment in DIRENT, which should be a LEN byte string.
162 *
163 * A trailing slash will not be included in the returned length except
164 * in the case in which DIRENT is absolute and there are no more
165 * previous segments.
166 */
167static apr_size_t
168dirent_previous_segment(const char *dirent,
169                        apr_size_t len)
170{
171  if (len == 0)
172    return 0;
173
174  --len;
175  while (len > 0 && dirent[len] != '/'
176#ifdef SVN_USE_DOS_PATHS
177                 && (dirent[len] != ':' || len != 1)
178#endif /* SVN_USE_DOS_PATHS */
179        )
180    --len;
181
182  /* check if the remaining segment including trailing '/' is a root dirent */
183  if (dirent_root_length(dirent, len+1) == len + 1)
184    return len + 1;
185  else
186    return len;
187}
188
189/* Calculates the length occupied by the schema defined root of URI */
190static apr_size_t
191uri_schema_root_length(const char *uri, apr_size_t len)
192{
193  apr_size_t i;
194
195  for (i = 0; i < len; i++)
196    {
197      if (uri[i] == '/')
198        {
199          if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
200            {
201              /* We have an absolute uri */
202              if (i == 5 && strncmp("file", uri, 4) == 0)
203                return 7; /* file:// */
204              else
205                {
206                  for (i += 2; i < len; i++)
207                    if (uri[i] == '/')
208                      return i;
209
210                  return len; /* Only a hostname is found */
211                }
212            }
213          else
214            return 0;
215        }
216    }
217
218  return 0;
219}
220
221/* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
222   a non absolute root. (E.g. '/' or 'F:' on Windows) */
223static svn_boolean_t
224dirent_is_rooted(const char *dirent)
225{
226  if (! dirent)
227    return FALSE;
228
229  /* Root on all systems */
230  if (dirent[0] == '/')
231    return TRUE;
232
233  /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
234     where 'H' is any letter. */
235#ifdef SVN_USE_DOS_PATHS
236  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
237       (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
238      (dirent[1] == ':'))
239     return TRUE;
240#endif /* SVN_USE_DOS_PATHS */
241
242  return FALSE;
243}
244
245/* Return the length of substring necessary to encompass the entire
246 * previous relpath segment in RELPATH, which should be a LEN byte string.
247 *
248 * A trailing slash will not be included in the returned length.
249 */
250static apr_size_t
251relpath_previous_segment(const char *relpath,
252                         apr_size_t len)
253{
254  if (len == 0)
255    return 0;
256
257  --len;
258  while (len > 0 && relpath[len] != '/')
259    --len;
260
261  return len;
262}
263
264/* Return the length of substring necessary to encompass the entire
265 * previous uri segment in URI, which should be a LEN byte string.
266 *
267 * A trailing slash will not be included in the returned length except
268 * in the case in which URI is absolute and there are no more
269 * previous segments.
270 */
271static apr_size_t
272uri_previous_segment(const char *uri,
273                     apr_size_t len)
274{
275  apr_size_t root_length;
276  apr_size_t i = len;
277  if (len == 0)
278    return 0;
279
280  root_length = uri_schema_root_length(uri, len);
281
282  --i;
283  while (len > root_length && uri[i] != '/')
284    --i;
285
286  if (i == 0 && len > 1 && *uri == '/')
287    return 1;
288
289  return i;
290}
291
292/* Return the canonicalized version of PATH, of type TYPE, allocated in
293 * POOL.
294 */
295static const char *
296canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
297{
298  char *canon, *dst;
299  const char *src;
300  apr_size_t seglen;
301  apr_size_t schemelen = 0;
302  apr_size_t canon_segments = 0;
303  svn_boolean_t url = FALSE;
304  char *schema_data = NULL;
305
306  /* "" is already canonical, so just return it; note that later code
307     depends on path not being zero-length.  */
308  if (SVN_PATH_IS_EMPTY(path))
309    {
310      assert(type != type_uri);
311      return "";
312    }
313
314  dst = canon = apr_pcalloc(pool, strlen(path) + 1);
315
316  /* If this is supposed to be an URI, it should start with
317     "scheme://".  We'll copy the scheme, host name, etc. to DST and
318     set URL = TRUE. */
319  src = path;
320  if (type == type_uri)
321    {
322      assert(*src != '/');
323
324      while (*src && (*src != '/') && (*src != ':'))
325        src++;
326
327      if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
328        {
329          const char *seg;
330
331          url = TRUE;
332
333          /* Found a scheme, convert to lowercase and copy to dst. */
334          src = path;
335          while (*src != ':')
336            {
337              *(dst++) = canonicalize_to_lower((*src++));
338              schemelen++;
339            }
340          *(dst++) = ':';
341          *(dst++) = '/';
342          *(dst++) = '/';
343          src += 3;
344          schemelen += 3;
345
346          /* This might be the hostname */
347          seg = src;
348          while (*src && (*src != '/') && (*src != '@'))
349            src++;
350
351          if (*src == '@')
352            {
353              /* Copy the username & password. */
354              seglen = src - seg + 1;
355              memcpy(dst, seg, seglen);
356              dst += seglen;
357              src++;
358            }
359          else
360            src = seg;
361
362          /* Found a hostname, convert to lowercase and copy to dst. */
363          if (*src == '[')
364            {
365             *(dst++) = *(src++); /* Copy '[' */
366
367              while (*src == ':'
368                     || (*src >= '0' && (*src <= '9'))
369                     || (*src >= 'a' && (*src <= 'f'))
370                     || (*src >= 'A' && (*src <= 'F')))
371                {
372                  *(dst++) = canonicalize_to_lower((*src++));
373                }
374
375              if (*src == ']')
376                *(dst++) = *(src++); /* Copy ']' */
377            }
378          else
379            while (*src && (*src != '/') && (*src != ':'))
380              *(dst++) = canonicalize_to_lower((*src++));
381
382          if (*src == ':')
383            {
384              /* We probably have a port number: Is it a default portnumber
385                 which doesn't belong in a canonical url? */
386              if (src[1] == '8' && src[2] == '0'
387                  && (src[3]== '/'|| !src[3])
388                  && !strncmp(canon, "http:", 5))
389                {
390                  src += 3;
391                }
392              else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
393                       && (src[4]== '/'|| !src[4])
394                       && !strncmp(canon, "https:", 6))
395                {
396                  src += 4;
397                }
398              else if (src[1] == '3' && src[2] == '6'
399                       && src[3] == '9' && src[4] == '0'
400                       && (src[5]== '/'|| !src[5])
401                       && !strncmp(canon, "svn:", 4))
402                {
403                  src += 5;
404                }
405              else if (src[1] == '/' || !src[1])
406                {
407                  src += 1;
408                }
409
410              while (*src && (*src != '/'))
411                *(dst++) = canonicalize_to_lower((*src++));
412            }
413
414          /* Copy trailing slash, or null-terminator. */
415          *(dst) = *(src);
416
417          /* Move src and dst forward only if we are not
418           * at null-terminator yet. */
419          if (*src)
420            {
421              src++;
422              dst++;
423              schema_data = dst;
424            }
425
426          canon_segments = 1;
427        }
428    }
429
430  /* Copy to DST any separator or drive letter that must come before the
431     first regular path segment. */
432  if (! url && type != type_relpath)
433    {
434      src = path;
435      /* If this is an absolute path, then just copy over the initial
436         separator character. */
437      if (*src == '/')
438        {
439          *(dst++) = *(src++);
440
441#ifdef SVN_USE_DOS_PATHS
442          /* On Windows permit two leading separator characters which means an
443           * UNC path. */
444          if ((type == type_dirent) && *src == '/')
445            *(dst++) = *(src++);
446#endif /* SVN_USE_DOS_PATHS */
447        }
448#ifdef SVN_USE_DOS_PATHS
449      /* On Windows the first segment can be a drive letter, which we normalize
450         to upper case. */
451      else if (type == type_dirent &&
452               ((*src >= 'a' && *src <= 'z') ||
453                (*src >= 'A' && *src <= 'Z')) &&
454               (src[1] == ':'))
455        {
456          *(dst++) = canonicalize_to_upper(*(src++));
457          /* Leave the ':' to be processed as (or as part of) a path segment
458             by the following code block, so we need not care whether it has
459             a slash after it. */
460        }
461#endif /* SVN_USE_DOS_PATHS */
462    }
463
464  while (*src)
465    {
466      /* Parse each segment, finding the closing '/' (which might look
467         like '%2F' for URIs).  */
468      const char *next = src;
469      apr_size_t slash_len = 0;
470
471      while (*next
472             && (next[0] != '/')
473             && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
474                    canonicalize_to_upper(next[2]) == 'F')))
475        {
476          ++next;
477        }
478
479      /* Record how long our "slash" is. */
480      if (next[0] == '/')
481        slash_len = 1;
482      else if (type == type_uri && next[0] == '%')
483        slash_len = 3;
484
485      seglen = next - src;
486
487      if (seglen == 0
488          || (seglen == 1 && src[0] == '.')
489          || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
490              && canonicalize_to_upper(src[2]) == 'E'))
491        {
492          /* Empty or noop segment, so do nothing.  (For URIs, '%2E'
493             is equivalent to '.').  */
494        }
495#ifdef SVN_USE_DOS_PATHS
496      /* If this is the first path segment of a file:// URI and it contains a
497         windows drive letter, convert the drive letter to upper case. */
498      else if (url && canon_segments == 1 && seglen == 2 &&
499               (strncmp(canon, "file:", 5) == 0) &&
500               src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
501        {
502          *(dst++) = canonicalize_to_upper(src[0]);
503          *(dst++) = ':';
504          if (*next)
505            *(dst++) = *next;
506          canon_segments++;
507        }
508#endif /* SVN_USE_DOS_PATHS */
509      else
510        {
511          /* An actual segment, append it to the destination path */
512          memcpy(dst, src, seglen);
513          dst += seglen;
514          if (slash_len)
515            *(dst++) = '/';
516          canon_segments++;
517        }
518
519      /* Skip over trailing slash to the next segment. */
520      src = next + slash_len;
521    }
522
523  /* Remove the trailing slash if there was at least one
524   * canonical segment and the last segment ends with a slash.
525   *
526   * But keep in mind that, for URLs, the scheme counts as a
527   * canonical segment -- so if path is ONLY a scheme (such
528   * as "https://") we should NOT remove the trailing slash. */
529  if ((canon_segments > 0 && *(dst - 1) == '/')
530      && ! (url && path[schemelen] == '\0'))
531    {
532      dst --;
533    }
534
535  *dst = '\0';
536
537#ifdef SVN_USE_DOS_PATHS
538  /* Skip leading double slashes when there are less than 2
539   * canon segments. UNC paths *MUST* have two segments. */
540  if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
541    {
542      if (canon_segments < 2)
543        return canon + 1;
544      else
545        {
546          /* Now we're sure this is a valid UNC path, convert the server name
547             (the first path segment) to lowercase as Windows treats it as case
548             insensitive.
549             Note: normally the share name is treated as case insensitive too,
550             but it seems to be possible to configure Samba to treat those as
551             case sensitive, so better leave that alone. */
552          for (dst = canon + 2; *dst && *dst != '/'; dst++)
553            *dst = canonicalize_to_lower(*dst);
554        }
555    }
556#endif /* SVN_USE_DOS_PATHS */
557
558  /* Check the normalization of characters in a uri */
559  if (schema_data)
560    {
561      int need_extra = 0;
562      src = schema_data;
563
564      while (*src)
565        {
566          switch (*src)
567            {
568              case '/':
569                break;
570              case '%':
571                if (!svn_ctype_isxdigit(*(src+1)) ||
572                    !svn_ctype_isxdigit(*(src+2)))
573                  need_extra += 2;
574                else
575                  src += 2;
576                break;
577              default:
578                if (!svn_uri__char_validity[(unsigned char)*src])
579                  need_extra += 2;
580                break;
581            }
582          src++;
583        }
584
585      if (need_extra > 0)
586        {
587          apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
588
589          dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
590          memcpy(dst, canon, pre_schema_size);
591          canon = dst;
592
593          dst += pre_schema_size;
594        }
595      else
596        dst = schema_data;
597
598      src = schema_data;
599
600      while (*src)
601        {
602          switch (*src)
603            {
604              case '/':
605                *(dst++) = '/';
606                break;
607              case '%':
608                if (!svn_ctype_isxdigit(*(src+1)) ||
609                    !svn_ctype_isxdigit(*(src+2)))
610                  {
611                    *(dst++) = '%';
612                    *(dst++) = '2';
613                    *(dst++) = '5';
614                  }
615                else
616                  {
617                    char digitz[3];
618                    int val;
619
620                    digitz[0] = *(++src);
621                    digitz[1] = *(++src);
622                    digitz[2] = 0;
623
624                    val = (int)strtol(digitz, NULL, 16);
625
626                    if (svn_uri__char_validity[(unsigned char)val])
627                      *(dst++) = (char)val;
628                    else
629                      {
630                        *(dst++) = '%';
631                        *(dst++) = canonicalize_to_upper(digitz[0]);
632                        *(dst++) = canonicalize_to_upper(digitz[1]);
633                      }
634                  }
635                break;
636              default:
637                if (!svn_uri__char_validity[(unsigned char)*src])
638                  {
639                    apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
640                    dst += 3;
641                  }
642                else
643                  *(dst++) = *src;
644                break;
645            }
646          src++;
647        }
648      *dst = '\0';
649    }
650
651  return canon;
652}
653
654/* Return the string length of the longest common ancestor of PATH1 and PATH2.
655 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
656 * PATH1 and PATH2 are regular paths.
657 *
658 * If the two paths do not share a common ancestor, return 0.
659 *
660 * New strings are allocated in POOL.
661 */
662static apr_size_t
663get_longest_ancestor_length(path_type_t types,
664                            const char *path1,
665                            const char *path2,
666                            apr_pool_t *pool)
667{
668  apr_size_t path1_len, path2_len;
669  apr_size_t i = 0;
670  apr_size_t last_dirsep = 0;
671#ifdef SVN_USE_DOS_PATHS
672  svn_boolean_t unc = FALSE;
673#endif
674
675  path1_len = strlen(path1);
676  path2_len = strlen(path2);
677
678  if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
679    return 0;
680
681  while (path1[i] == path2[i])
682    {
683      /* Keep track of the last directory separator we hit. */
684      if (path1[i] == '/')
685        last_dirsep = i;
686
687      i++;
688
689      /* If we get to the end of either path, break out. */
690      if ((i == path1_len) || (i == path2_len))
691        break;
692    }
693
694  /* two special cases:
695     1. '/' is the longest common ancestor of '/' and '/foo' */
696  if (i == 1 && path1[0] == '/' && path2[0] == '/')
697    return 1;
698  /* 2. '' is the longest common ancestor of any non-matching
699   * strings 'foo' and 'bar' */
700  if (types == type_dirent && i == 0)
701    return 0;
702
703  /* Handle some windows specific cases */
704#ifdef SVN_USE_DOS_PATHS
705  if (types == type_dirent)
706    {
707      /* don't count the '//' from UNC paths */
708      if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
709        {
710          last_dirsep = 0;
711          unc = TRUE;
712        }
713
714      /* X:/ and X:/foo */
715      if (i == 3 && path1[2] == '/' && path1[1] == ':')
716        return i;
717
718      /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
719       * Note that this assertion triggers only if the code above has
720       * been broken. The code below relies on this assertion, because
721       * it uses [i - 1] as index. */
722      assert(i > 0);
723
724      /* X: and X:/ */
725      if ((path1[i - 1] == ':' && path2[i] == '/') ||
726          (path2[i - 1] == ':' && path1[i] == '/'))
727          return 0;
728      /* X: and X:foo */
729      if (path1[i - 1] == ':' || path2[i - 1] == ':')
730          return i;
731    }
732#endif /* SVN_USE_DOS_PATHS */
733
734  /* last_dirsep is now the offset of the last directory separator we
735     crossed before reaching a non-matching byte.  i is the offset of
736     that non-matching byte, and is guaranteed to be <= the length of
737     whichever path is shorter.
738     If one of the paths is the common part return that. */
739  if (((i == path1_len) && (path2[i] == '/'))
740           || ((i == path2_len) && (path1[i] == '/'))
741           || ((i == path1_len) && (i == path2_len)))
742    return i;
743  else
744    {
745      /* Nothing in common but the root folder '/' or 'X:/' for Windows
746         dirents. */
747#ifdef SVN_USE_DOS_PATHS
748      if (! unc)
749        {
750          /* X:/foo and X:/bar returns X:/ */
751          if ((types == type_dirent) &&
752              last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
753                               && path2[1] == ':' && path2[2] == '/')
754            return 3;
755#endif /* SVN_USE_DOS_PATHS */
756          if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
757            return 1;
758#ifdef SVN_USE_DOS_PATHS
759        }
760#endif
761    }
762
763  return last_dirsep;
764}
765
766/* Determine whether PATH2 is a child of PATH1.
767 *
768 * PATH2 is a child of PATH1 if
769 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
770 * or
771 * 2) PATH2 is has n components, PATH1 has x < n components,
772 *    and PATH1 matches PATH2 in all its x components.
773 *    Components are separated by a slash, '/'.
774 *
775 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
776 * PATH1 and PATH2 are regular paths.
777 *
778 * If PATH2 is not a child of PATH1, return NULL.
779 *
780 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
781 * of the child part of PATH2 in POOL and return a pointer to the
782 * newly allocated child part.
783 *
784 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
785 * pointing to the child part of PATH2.
786 * */
787static const char *
788is_child(path_type_t type, const char *path1, const char *path2,
789         apr_pool_t *pool)
790{
791  apr_size_t i;
792
793  /* Allow "" and "foo" or "H:foo" to be parent/child */
794  if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
795    {
796      if (SVN_PATH_IS_EMPTY(path2))            /* "" not a child    */
797        return NULL;
798
799      /* check if this is an absolute path */
800      if ((type == type_uri) ||
801          (type == type_dirent && dirent_is_rooted(path2)))
802        return NULL;
803      else
804        /* everything else is child */
805        return pool ? apr_pstrdup(pool, path2) : path2;
806    }
807
808  /* Reach the end of at least one of the paths.  How should we handle
809     things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
810     appear to arise in the current Subversion code, it's not clear to me
811     if they should be parent/child or not. */
812  /* Hmmm... aren't paths assumed to be canonical in this function?
813   * How can "foo///bar" even happen if the paths are canonical? */
814  for (i = 0; path1[i] && path2[i]; i++)
815    if (path1[i] != path2[i])
816      return NULL;
817
818  /* FIXME: This comment does not really match
819   * the checks made in the code it refers to: */
820  /* There are two cases that are parent/child
821          ...      path1[i] == '\0'
822          .../foo  path2[i] == '/'
823      or
824          /        path1[i] == '\0'
825          /foo     path2[i] != '/'
826
827     Other root paths (like X:/) fall under the former case:
828          X:/        path1[i] == '\0'
829          X:/foo     path2[i] != '/'
830
831     Check for '//' to avoid matching '/' and '//srv'.
832  */
833  if (path1[i] == '\0' && path2[i])
834    {
835      if (path1[i - 1] == '/'
836#ifdef SVN_USE_DOS_PATHS
837          || ((type == type_dirent) && path1[i - 1] == ':')
838#endif
839           )
840        {
841          if (path2[i] == '/')
842            /* .../
843             * ..../
844             *     i   */
845            return NULL;
846          else
847            /* .../
848             * .../foo
849             *     i    */
850            return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
851        }
852      else if (path2[i] == '/')
853        {
854          if (path2[i + 1])
855            /* ...
856             * .../foo
857             *    i    */
858            return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
859          else
860            /* ...
861             * .../
862             *    i    */
863            return NULL;
864        }
865    }
866
867  /* Otherwise, path2 isn't a child. */
868  return NULL;
869}
870
871
872/**** Public API functions ****/
873
874const char *
875svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
876{
877  return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
878}
879
880const char *
881svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
882{
883  /* Internally, Subversion represents the current directory with the
884     empty string.  But users like to see "." . */
885  if (SVN_PATH_IS_EMPTY(dirent))
886    return ".";
887
888#if '/' != SVN_PATH_LOCAL_SEPARATOR
889    {
890      char *p = apr_pstrdup(pool, dirent);
891      dirent = p;
892
893      /* Convert all canonical separators to the local-style ones. */
894      for (; *p != '\0'; ++p)
895        if (*p == '/')
896          *p = SVN_PATH_LOCAL_SEPARATOR;
897    }
898#endif
899
900  return dirent;
901}
902
903const char *
904svn_relpath__internal_style(const char *relpath,
905                            apr_pool_t *pool)
906{
907  return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
908}
909
910
911/* We decided against using apr_filepath_root here because of the negative
912   performance impact (creating a pool and converting strings ). */
913svn_boolean_t
914svn_dirent_is_root(const char *dirent, apr_size_t len)
915{
916#ifdef SVN_USE_DOS_PATHS
917  /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
918     are also root directories */
919  if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
920      (dirent[1] == ':') &&
921      ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
922       (dirent[0] >= 'a' && dirent[0] <= 'z')))
923    return TRUE;
924
925  /* On Windows and Cygwin //server/share is a root directory,
926     and on Cygwin //drive is a drive alias */
927  if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
928      && dirent[len - 1] != '/')
929    {
930      int segments = 0;
931      apr_size_t i;
932      for (i = len; i >= 2; i--)
933        {
934          if (dirent[i] == '/')
935            {
936              segments ++;
937              if (segments > 1)
938                return FALSE;
939            }
940        }
941#ifdef __CYGWIN__
942      return (segments <= 1);
943#else
944      return (segments == 1); /* //drive is invalid on plain Windows */
945#endif
946    }
947#endif
948
949  /* directory is root if it's equal to '/' */
950  if (len == 1 && dirent[0] == '/')
951    return TRUE;
952
953  return FALSE;
954}
955
956svn_boolean_t
957svn_uri_is_root(const char *uri, apr_size_t len)
958{
959  assert(svn_uri_is_canonical(uri, NULL));
960  return (len == uri_schema_root_length(uri, len));
961}
962
963char *svn_dirent_join(const char *base,
964                      const char *component,
965                      apr_pool_t *pool)
966{
967  apr_size_t blen = strlen(base);
968  apr_size_t clen = strlen(component);
969  char *dirent;
970  int add_separator;
971
972  assert(svn_dirent_is_canonical(base, pool));
973  assert(svn_dirent_is_canonical(component, pool));
974
975  /* If the component is absolute, then return it.  */
976  if (svn_dirent_is_absolute(component))
977    return apr_pmemdup(pool, component, clen + 1);
978
979  /* If either is empty return the other */
980  if (SVN_PATH_IS_EMPTY(base))
981    return apr_pmemdup(pool, component, clen + 1);
982  if (SVN_PATH_IS_EMPTY(component))
983    return apr_pmemdup(pool, base, blen + 1);
984
985#ifdef SVN_USE_DOS_PATHS
986  if (component[0] == '/')
987    {
988      /* '/' is drive relative on Windows, not absolute like on Posix */
989      if (dirent_is_rooted(base))
990        {
991          /* Join component without '/' to root-of(base) */
992          blen = dirent_root_length(base, blen);
993          component++;
994          clen--;
995
996          if (blen == 2 && base[1] == ':') /* "C:" case */
997            {
998              char *root = apr_pmemdup(pool, base, 3);
999              root[2] = '/'; /* We don't need the final '\0' */
1000
1001              base = root;
1002              blen = 3;
1003            }
1004
1005          if (clen == 0)
1006            return apr_pstrndup(pool, base, blen);
1007        }
1008      else
1009        return apr_pmemdup(pool, component, clen + 1);
1010    }
1011  else if (dirent_is_rooted(component))
1012    return apr_pmemdup(pool, component, clen + 1);
1013#endif /* SVN_USE_DOS_PATHS */
1014
1015  /* if last character of base is already a separator, don't add a '/' */
1016  add_separator = 1;
1017  if (base[blen - 1] == '/'
1018#ifdef SVN_USE_DOS_PATHS
1019       || base[blen - 1] == ':'
1020#endif
1021        )
1022          add_separator = 0;
1023
1024  /* Construct the new, combined dirent. */
1025  dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1026  memcpy(dirent, base, blen);
1027  if (add_separator)
1028    dirent[blen] = '/';
1029  memcpy(dirent + blen + add_separator, component, clen + 1);
1030
1031  return dirent;
1032}
1033
1034char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1035{
1036#define MAX_SAVED_LENGTHS 10
1037  apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1038  apr_size_t total_len;
1039  int nargs;
1040  va_list va;
1041  const char *s;
1042  apr_size_t len;
1043  char *dirent;
1044  char *p;
1045  int add_separator;
1046  int base_arg = 0;
1047
1048  total_len = strlen(base);
1049
1050  assert(svn_dirent_is_canonical(base, pool));
1051
1052  /* if last character of base is already a separator, don't add a '/' */
1053  add_separator = 1;
1054  if (total_len == 0
1055       || base[total_len - 1] == '/'
1056#ifdef SVN_USE_DOS_PATHS
1057       || base[total_len - 1] == ':'
1058#endif
1059        )
1060          add_separator = 0;
1061
1062  saved_lengths[0] = total_len;
1063
1064  /* Compute the length of the resulting string. */
1065
1066  nargs = 0;
1067  va_start(va, base);
1068  while ((s = va_arg(va, const char *)) != NULL)
1069    {
1070      len = strlen(s);
1071
1072      assert(svn_dirent_is_canonical(s, pool));
1073
1074      if (SVN_PATH_IS_EMPTY(s))
1075        continue;
1076
1077      if (nargs++ < MAX_SAVED_LENGTHS)
1078        saved_lengths[nargs] = len;
1079
1080      if (dirent_is_rooted(s))
1081        {
1082          total_len = len;
1083          base_arg = nargs;
1084
1085#ifdef SVN_USE_DOS_PATHS
1086          if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1087            {
1088              /* Set new base and skip the current argument */
1089              base = s = svn_dirent_join(base, s, pool);
1090              base_arg++;
1091              saved_lengths[0] = total_len = len = strlen(s);
1092            }
1093          else
1094#endif /* SVN_USE_DOS_PATHS */
1095            {
1096              base = ""; /* Don't add base */
1097              saved_lengths[0] = 0;
1098            }
1099
1100          add_separator = 1;
1101          if (s[len - 1] == '/'
1102#ifdef SVN_USE_DOS_PATHS
1103             || s[len - 1] == ':'
1104#endif
1105              )
1106             add_separator = 0;
1107        }
1108      else if (nargs <= base_arg + 1)
1109        {
1110          total_len += add_separator + len;
1111        }
1112      else
1113        {
1114          total_len += 1 + len;
1115        }
1116    }
1117  va_end(va);
1118
1119  /* base == "/" and no further components. just return that. */
1120  if (add_separator == 0 && total_len == 1)
1121    return apr_pmemdup(pool, "/", 2);
1122
1123  /* we got the total size. allocate it, with room for a NULL character. */
1124  dirent = p = apr_palloc(pool, total_len + 1);
1125
1126  /* if we aren't supposed to skip forward to an absolute component, and if
1127     this is not an empty base that we are skipping, then copy the base
1128     into the output. */
1129  if (! SVN_PATH_IS_EMPTY(base))
1130    {
1131      memcpy(p, base, len = saved_lengths[0]);
1132      p += len;
1133    }
1134
1135  nargs = 0;
1136  va_start(va, base);
1137  while ((s = va_arg(va, const char *)) != NULL)
1138    {
1139      if (SVN_PATH_IS_EMPTY(s))
1140        continue;
1141
1142      if (++nargs < base_arg)
1143        continue;
1144
1145      if (nargs < MAX_SAVED_LENGTHS)
1146        len = saved_lengths[nargs];
1147      else
1148        len = strlen(s);
1149
1150      /* insert a separator if we aren't copying in the first component
1151         (which can happen when base_arg is set). also, don't put in a slash
1152         if the prior character is a slash (occurs when prior component
1153         is "/"). */
1154      if (p != dirent &&
1155          ( ! (nargs - 1 <= base_arg) || add_separator))
1156        *p++ = '/';
1157
1158      /* copy the new component and advance the pointer */
1159      memcpy(p, s, len);
1160      p += len;
1161    }
1162  va_end(va);
1163
1164  *p = '\0';
1165  assert((apr_size_t)(p - dirent) == total_len);
1166
1167  return dirent;
1168}
1169
1170char *
1171svn_relpath_join(const char *base,
1172                 const char *component,
1173                 apr_pool_t *pool)
1174{
1175  apr_size_t blen = strlen(base);
1176  apr_size_t clen = strlen(component);
1177  char *path;
1178
1179  assert(relpath_is_canonical(base));
1180  assert(relpath_is_canonical(component));
1181
1182  /* If either is empty return the other */
1183  if (blen == 0)
1184    return apr_pmemdup(pool, component, clen + 1);
1185  if (clen == 0)
1186    return apr_pmemdup(pool, base, blen + 1);
1187
1188  path = apr_palloc(pool, blen + 1 + clen + 1);
1189  memcpy(path, base, blen);
1190  path[blen] = '/';
1191  memcpy(path + blen + 1, component, clen + 1);
1192
1193  return path;
1194}
1195
1196char *
1197svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1198{
1199  apr_size_t len = strlen(dirent);
1200
1201  assert(svn_dirent_is_canonical(dirent, pool));
1202
1203  if (len == dirent_root_length(dirent, len))
1204    return apr_pstrmemdup(pool, dirent, len);
1205  else
1206    return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1207}
1208
1209const char *
1210svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1211{
1212  apr_size_t len = strlen(dirent);
1213  apr_size_t start;
1214
1215  assert(!pool || svn_dirent_is_canonical(dirent, pool));
1216
1217  if (svn_dirent_is_root(dirent, len))
1218    return "";
1219  else
1220    {
1221      start = len;
1222      while (start > 0 && dirent[start - 1] != '/'
1223#ifdef SVN_USE_DOS_PATHS
1224             && dirent[start - 1] != ':'
1225#endif
1226            )
1227        --start;
1228    }
1229
1230  if (pool)
1231    return apr_pstrmemdup(pool, dirent + start, len - start);
1232  else
1233    return dirent + start;
1234}
1235
1236void
1237svn_dirent_split(const char **dirpath,
1238                 const char **base_name,
1239                 const char *dirent,
1240                 apr_pool_t *pool)
1241{
1242  assert(dirpath != base_name);
1243
1244  if (dirpath)
1245    *dirpath = svn_dirent_dirname(dirent, pool);
1246
1247  if (base_name)
1248    *base_name = svn_dirent_basename(dirent, pool);
1249}
1250
1251char *
1252svn_relpath_dirname(const char *relpath,
1253                    apr_pool_t *pool)
1254{
1255  apr_size_t len = strlen(relpath);
1256
1257  assert(relpath_is_canonical(relpath));
1258
1259  return apr_pstrmemdup(pool, relpath,
1260                        relpath_previous_segment(relpath, len));
1261}
1262
1263const char *
1264svn_relpath_basename(const char *relpath,
1265                     apr_pool_t *pool)
1266{
1267  apr_size_t len = strlen(relpath);
1268  apr_size_t start;
1269
1270  assert(relpath_is_canonical(relpath));
1271
1272  start = len;
1273  while (start > 0 && relpath[start - 1] != '/')
1274    --start;
1275
1276  if (pool)
1277    return apr_pstrmemdup(pool, relpath + start, len - start);
1278  else
1279    return relpath + start;
1280}
1281
1282void
1283svn_relpath_split(const char **dirpath,
1284                  const char **base_name,
1285                  const char *relpath,
1286                  apr_pool_t *pool)
1287{
1288  assert(dirpath != base_name);
1289
1290  if (dirpath)
1291    *dirpath = svn_relpath_dirname(relpath, pool);
1292
1293  if (base_name)
1294    *base_name = svn_relpath_basename(relpath, pool);
1295}
1296
1297char *
1298svn_uri_dirname(const char *uri, apr_pool_t *pool)
1299{
1300  apr_size_t len = strlen(uri);
1301
1302  assert(svn_uri_is_canonical(uri, pool));
1303
1304  if (svn_uri_is_root(uri, len))
1305    return apr_pstrmemdup(pool, uri, len);
1306  else
1307    return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1308}
1309
1310const char *
1311svn_uri_basename(const char *uri, apr_pool_t *pool)
1312{
1313  apr_size_t len = strlen(uri);
1314  apr_size_t start;
1315
1316  assert(svn_uri_is_canonical(uri, NULL));
1317
1318  if (svn_uri_is_root(uri, len))
1319    return "";
1320
1321  start = len;
1322  while (start > 0 && uri[start - 1] != '/')
1323    --start;
1324
1325  return svn_path_uri_decode(uri + start, pool);
1326}
1327
1328void
1329svn_uri_split(const char **dirpath,
1330              const char **base_name,
1331              const char *uri,
1332              apr_pool_t *pool)
1333{
1334  assert(dirpath != base_name);
1335
1336  if (dirpath)
1337    *dirpath = svn_uri_dirname(uri, pool);
1338
1339  if (base_name)
1340    *base_name = svn_uri_basename(uri, pool);
1341}
1342
1343char *
1344svn_dirent_get_longest_ancestor(const char *dirent1,
1345                                const char *dirent2,
1346                                apr_pool_t *pool)
1347{
1348  return apr_pstrndup(pool, dirent1,
1349                      get_longest_ancestor_length(type_dirent, dirent1,
1350                                                  dirent2, pool));
1351}
1352
1353char *
1354svn_relpath_get_longest_ancestor(const char *relpath1,
1355                                 const char *relpath2,
1356                                 apr_pool_t *pool)
1357{
1358  assert(relpath_is_canonical(relpath1));
1359  assert(relpath_is_canonical(relpath2));
1360
1361  return apr_pstrndup(pool, relpath1,
1362                      get_longest_ancestor_length(type_relpath, relpath1,
1363                                                  relpath2, pool));
1364}
1365
1366char *
1367svn_uri_get_longest_ancestor(const char *uri1,
1368                             const char *uri2,
1369                             apr_pool_t *pool)
1370{
1371  apr_size_t uri_ancestor_len;
1372  apr_size_t i = 0;
1373
1374  assert(svn_uri_is_canonical(uri1, NULL));
1375  assert(svn_uri_is_canonical(uri2, NULL));
1376
1377  /* Find ':' */
1378  while (1)
1379    {
1380      /* No shared protocol => no common prefix */
1381      if (uri1[i] != uri2[i])
1382        return apr_pmemdup(pool, SVN_EMPTY_PATH,
1383                           sizeof(SVN_EMPTY_PATH));
1384
1385      if (uri1[i] == ':')
1386        break;
1387
1388      /* They're both URLs, so EOS can't come before ':' */
1389      assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1390
1391      i++;
1392    }
1393
1394  i += 3;  /* Advance past '://' */
1395
1396  uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1397                                                 uri2 + i, pool);
1398
1399  if (uri_ancestor_len == 0 ||
1400      (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1401    return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1402  else
1403    return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1404}
1405
1406const char *
1407svn_dirent_is_child(const char *parent_dirent,
1408                    const char *child_dirent,
1409                    apr_pool_t *pool)
1410{
1411  return is_child(type_dirent, parent_dirent, child_dirent, pool);
1412}
1413
1414const char *
1415svn_dirent_skip_ancestor(const char *parent_dirent,
1416                         const char *child_dirent)
1417{
1418  apr_size_t len = strlen(parent_dirent);
1419  apr_size_t root_len;
1420
1421  if (0 != strncmp(parent_dirent, child_dirent, len))
1422    return NULL; /* parent_dirent is no ancestor of child_dirent */
1423
1424  if (child_dirent[len] == 0)
1425    return ""; /* parent_dirent == child_dirent */
1426
1427  /* Child == parent + more-characters */
1428
1429  root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1430  if (root_len > len)
1431    /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1432    return NULL;
1433
1434  /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1435   * It must be one of the following forms.
1436   *
1437   * rlen parent    child       bad?  rlen=len? c[len]=/?
1438   *  0   ""        "foo"               *
1439   *  0   "b"       "bad"         !
1440   *  0   "b"       "b/foo"                       *
1441   *  1   "/"       "/foo"              *
1442   *  1   "/b"      "/bad"        !
1443   *  1   "/b"      "/b/foo"                      *
1444   *  2   "a:"      "a:foo"             *
1445   *  2   "a:b"     "a:bad"       !
1446   *  2   "a:b"     "a:b/foo"                     *
1447   *  3   "a:/"     "a:/foo"            *
1448   *  3   "a:/b"    "a:/bad"      !
1449   *  3   "a:/b"    "a:/b/foo"                    *
1450   *  5   "//s/s"   "//s/s/foo"         *         *
1451   *  5   "//s/s/b" "//s/s/bad"   !
1452   *  5   "//s/s/b" "//s/s/b/foo"                 *
1453   */
1454
1455  if (child_dirent[len] == '/')
1456    /* "parent|child" is one of:
1457     * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1458    return child_dirent + len + 1;
1459
1460  if (root_len == len)
1461    /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1462    return child_dirent + len;
1463
1464  return NULL;
1465}
1466
1467const char *
1468svn_relpath_skip_ancestor(const char *parent_relpath,
1469                          const char *child_relpath)
1470{
1471  apr_size_t len = strlen(parent_relpath);
1472
1473  assert(relpath_is_canonical(parent_relpath));
1474  assert(relpath_is_canonical(child_relpath));
1475
1476  if (len == 0)
1477    return child_relpath;
1478
1479  if (0 != strncmp(parent_relpath, child_relpath, len))
1480    return NULL; /* parent_relpath is no ancestor of child_relpath */
1481
1482  if (child_relpath[len] == 0)
1483    return ""; /* parent_relpath == child_relpath */
1484
1485  if (child_relpath[len] == '/')
1486    return child_relpath + len + 1;
1487
1488  return NULL;
1489}
1490
1491
1492/* */
1493static const char *
1494uri_skip_ancestor(const char *parent_uri,
1495                  const char *child_uri)
1496{
1497  apr_size_t len = strlen(parent_uri);
1498
1499  assert(svn_uri_is_canonical(parent_uri, NULL));
1500  assert(svn_uri_is_canonical(child_uri, NULL));
1501
1502  if (0 != strncmp(parent_uri, child_uri, len))
1503    return NULL; /* parent_uri is no ancestor of child_uri */
1504
1505  if (child_uri[len] == 0)
1506    return ""; /* parent_uri == child_uri */
1507
1508  if (child_uri[len] == '/')
1509    return child_uri + len + 1;
1510
1511  return NULL;
1512}
1513
1514const char *
1515svn_uri_skip_ancestor(const char *parent_uri,
1516                      const char *child_uri,
1517                      apr_pool_t *result_pool)
1518{
1519  const char *result = uri_skip_ancestor(parent_uri, child_uri);
1520
1521  return result ? svn_path_uri_decode(result, result_pool) : NULL;
1522}
1523
1524svn_boolean_t
1525svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1526{
1527  return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1528}
1529
1530svn_boolean_t
1531svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1532{
1533  return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1534}
1535
1536
1537svn_boolean_t
1538svn_dirent_is_absolute(const char *dirent)
1539{
1540  if (! dirent)
1541    return FALSE;
1542
1543  /* dirent is absolute if it starts with '/' on non-Windows platforms
1544     or with '//' on Windows platforms */
1545  if (dirent[0] == '/'
1546#ifdef SVN_USE_DOS_PATHS
1547      && dirent[1] == '/' /* Single '/' depends on current drive */
1548#endif
1549      )
1550    return TRUE;
1551
1552  /* On Windows, dirent is also absolute when it starts with 'H:/'
1553     where 'H' is any letter. */
1554#ifdef SVN_USE_DOS_PATHS
1555  if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1556      (dirent[1] == ':') && (dirent[2] == '/'))
1557     return TRUE;
1558#endif /* SVN_USE_DOS_PATHS */
1559
1560  return FALSE;
1561}
1562
1563svn_error_t *
1564svn_dirent_get_absolute(const char **pabsolute,
1565                        const char *relative,
1566                        apr_pool_t *pool)
1567{
1568  char *buffer;
1569  apr_status_t apr_err;
1570  const char *path_apr;
1571
1572  SVN_ERR_ASSERT(! svn_path_is_url(relative));
1573
1574  /* Merge the current working directory with the relative dirent. */
1575  SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1576
1577  apr_err = apr_filepath_merge(&buffer, NULL,
1578                               path_apr,
1579                               APR_FILEPATH_NOTRELATIVE,
1580                               pool);
1581  if (apr_err)
1582    {
1583      /* In some cases when the passed path or its ancestor(s) do not exist
1584         or no longer exist apr returns an error.
1585
1586         In many of these cases we would like to return a path anyway, when the
1587         passed path was already a safe absolute path. So check for that now to
1588         avoid an error.
1589
1590         svn_dirent_is_absolute() doesn't perform the necessary checks to see
1591         if the path doesn't need post processing to be in the canonical absolute
1592         format.
1593         */
1594
1595      if (svn_dirent_is_absolute(relative)
1596          && svn_dirent_is_canonical(relative, pool)
1597          && !svn_path_is_backpath_present(relative))
1598        {
1599          *pabsolute = apr_pstrdup(pool, relative);
1600          return SVN_NO_ERROR;
1601        }
1602
1603      return svn_error_createf(SVN_ERR_BAD_FILENAME,
1604                               svn_error_create(apr_err, NULL, NULL),
1605                               _("Couldn't determine absolute path of '%s'"),
1606                               svn_dirent_local_style(relative, pool));
1607    }
1608
1609  SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1610  *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1611  return SVN_NO_ERROR;
1612}
1613
1614const char *
1615svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1616{
1617  return canonicalize(type_uri, uri, pool);
1618}
1619
1620const char *
1621svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1622{
1623  return canonicalize(type_relpath, relpath, pool);
1624}
1625
1626const char *
1627svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1628{
1629  const char *dst = canonicalize(type_dirent, dirent, pool);
1630
1631#ifdef SVN_USE_DOS_PATHS
1632  /* Handle a specific case on Windows where path == "X:/". Here we have to
1633     append the final '/', as svn_path_canonicalize will chop this of. */
1634  if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1635        (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1636        dirent[1] == ':' && dirent[2] == '/' &&
1637        dst[3] == '\0')
1638    {
1639      char *dst_slash = apr_pcalloc(pool, 4);
1640      dst_slash[0] = canonicalize_to_upper(dirent[0]);
1641      dst_slash[1] = ':';
1642      dst_slash[2] = '/';
1643      dst_slash[3] = '\0';
1644
1645      return dst_slash;
1646    }
1647#endif /* SVN_USE_DOS_PATHS */
1648
1649  return dst;
1650}
1651
1652svn_boolean_t
1653svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1654{
1655  const char *ptr = dirent;
1656  if (*ptr == '/')
1657    {
1658      ptr++;
1659#ifdef SVN_USE_DOS_PATHS
1660      /* Check for UNC paths */
1661      if (*ptr == '/')
1662        {
1663          /* TODO: Scan hostname and sharename and fall back to part code */
1664
1665          /* ### Fall back to old implementation */
1666          return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1667                  == 0);
1668        }
1669#endif /* SVN_USE_DOS_PATHS */
1670    }
1671#ifdef SVN_USE_DOS_PATHS
1672  else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1673           (ptr[1] == ':'))
1674    {
1675      /* The only canonical drive names are "A:"..."Z:", no lower case */
1676      if (*ptr < 'A' || *ptr > 'Z')
1677        return FALSE;
1678
1679      ptr += 2;
1680
1681      if (*ptr == '/')
1682        ptr++;
1683    }
1684#endif /* SVN_USE_DOS_PATHS */
1685
1686  return relpath_is_canonical(ptr);
1687}
1688
1689static svn_boolean_t
1690relpath_is_canonical(const char *relpath)
1691{
1692  const char *ptr = relpath, *seg = relpath;
1693
1694  /* RELPATH is canonical if it has:
1695   *  - no '.' segments
1696   *  - no start and closing '/'
1697   *  - no '//'
1698   */
1699
1700  if (*relpath == '\0')
1701    return TRUE;
1702
1703  if (*ptr == '/')
1704    return FALSE;
1705
1706  /* Now validate the rest of the path. */
1707  while(1)
1708    {
1709      apr_size_t seglen = ptr - seg;
1710
1711      if (seglen == 1 && *seg == '.')
1712        return FALSE;  /*  /./   */
1713
1714      if (*ptr == '/' && *(ptr+1) == '/')
1715        return FALSE;  /*  //    */
1716
1717      if (! *ptr && *(ptr - 1) == '/')
1718        return FALSE;  /* foo/  */
1719
1720      if (! *ptr)
1721        break;
1722
1723      if (*ptr == '/')
1724        ptr++;
1725      seg = ptr;
1726
1727      while (*ptr && (*ptr != '/'))
1728        ptr++;
1729    }
1730
1731  return TRUE;
1732}
1733
1734svn_boolean_t
1735svn_relpath_is_canonical(const char *relpath)
1736{
1737  return relpath_is_canonical(relpath);
1738}
1739
1740svn_boolean_t
1741svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1742{
1743  const char *ptr = uri, *seg = uri;
1744  const char *schema_data = NULL;
1745
1746  /* URI is canonical if it has:
1747   *  - lowercase URL scheme
1748   *  - lowercase URL hostname
1749   *  - no '.' segments
1750   *  - no closing '/'
1751   *  - no '//'
1752   *  - uppercase hex-encoded pair digits ("%AB", not "%ab")
1753   */
1754
1755  if (*uri == '\0')
1756    return FALSE;
1757
1758  if (! svn_path_is_url(uri))
1759    return FALSE;
1760
1761  /* Skip the scheme. */
1762  while (*ptr && (*ptr != '/') && (*ptr != ':'))
1763    ptr++;
1764
1765  /* No scheme?  No good. */
1766  if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1767    return FALSE;
1768
1769  /* Found a scheme, check that it's all lowercase. */
1770  ptr = uri;
1771  while (*ptr != ':')
1772    {
1773      if (*ptr >= 'A' && *ptr <= 'Z')
1774        return FALSE;
1775      ptr++;
1776    }
1777  /* Skip :// */
1778  ptr += 3;
1779
1780  /* Scheme only?  That works. */
1781  if (! *ptr)
1782    return TRUE;
1783
1784  /* This might be the hostname */
1785  seg = ptr;
1786  while (*ptr && (*ptr != '/') && (*ptr != '@'))
1787    ptr++;
1788
1789  if (*ptr == '@')
1790    seg = ptr + 1;
1791
1792  /* Found a hostname, check that it's all lowercase. */
1793  ptr = seg;
1794
1795  if (*ptr == '[')
1796    {
1797      ptr++;
1798      while (*ptr == ':'
1799             || (*ptr >= '0' && *ptr <= '9')
1800             || (*ptr >= 'a' && *ptr <= 'f'))
1801        {
1802          ptr++;
1803        }
1804
1805      if (*ptr != ']')
1806        return FALSE;
1807      ptr++;
1808    }
1809  else
1810    while (*ptr && *ptr != '/' && *ptr != ':')
1811      {
1812        if (*ptr >= 'A' && *ptr <= 'Z')
1813          return FALSE;
1814        ptr++;
1815      }
1816
1817  /* Found a portnumber */
1818  if (*ptr == ':')
1819    {
1820      apr_int64_t port = 0;
1821
1822      ptr++;
1823      schema_data = ptr;
1824
1825      while (*ptr >= '0' && *ptr <= '9')
1826        {
1827          port = 10 * port + (*ptr - '0');
1828          ptr++;
1829        }
1830
1831      if (ptr == schema_data)
1832        return FALSE; /* Fail on "http://host:" */
1833
1834      if (*ptr && *ptr != '/')
1835        return FALSE; /* Not a port number */
1836
1837      if (port == 80 && strncmp(uri, "http:", 5) == 0)
1838        return FALSE;
1839      else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1840        return FALSE;
1841      else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1842        return FALSE;
1843    }
1844
1845  schema_data = ptr;
1846
1847#ifdef SVN_USE_DOS_PATHS
1848  if (schema_data && *ptr == '/')
1849    {
1850      /* If this is a file url, ptr now points to the third '/' in
1851         file:///C:/path. Check that if we have such a URL the drive
1852         letter is in uppercase. */
1853      if (strncmp(uri, "file:", 5) == 0 &&
1854          ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1855          *(ptr+2) == ':')
1856        return FALSE;
1857    }
1858#endif /* SVN_USE_DOS_PATHS */
1859
1860  /* Now validate the rest of the URI. */
1861  seg = ptr;
1862  while (*ptr && (*ptr != '/'))
1863    ptr++;
1864  while(1)
1865    {
1866      apr_size_t seglen = ptr - seg;
1867
1868      if (seglen == 1 && *seg == '.')
1869        return FALSE;  /*  /./   */
1870
1871      if (*ptr == '/' && *(ptr+1) == '/')
1872        return FALSE;  /*  //    */
1873
1874      if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1875        return FALSE;  /* foo/  */
1876
1877      if (! *ptr)
1878        break;
1879
1880      if (*ptr == '/')
1881        ptr++;
1882
1883      seg = ptr;
1884      while (*ptr && (*ptr != '/'))
1885        ptr++;
1886    }
1887
1888  ptr = schema_data;
1889
1890  while (*ptr)
1891    {
1892      if (*ptr == '%')
1893        {
1894          char digitz[3];
1895          int val;
1896
1897          /* Can't usesvn_ctype_isxdigit() because lower case letters are
1898             not in our canonical format */
1899          if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1900              && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1901            return FALSE;
1902          else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1903                   && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1904            return FALSE;
1905
1906          digitz[0] = *(++ptr);
1907          digitz[1] = *(++ptr);
1908          digitz[2] = '\0';
1909          val = (int)strtol(digitz, NULL, 16);
1910
1911          if (svn_uri__char_validity[val])
1912            return FALSE; /* Should not have been escaped */
1913        }
1914      else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1915        return FALSE; /* Character should have been escaped */
1916      ptr++;
1917    }
1918
1919  return TRUE;
1920}
1921
1922svn_error_t *
1923svn_dirent_condense_targets(const char **pcommon,
1924                            apr_array_header_t **pcondensed_targets,
1925                            const apr_array_header_t *targets,
1926                            svn_boolean_t remove_redundancies,
1927                            apr_pool_t *result_pool,
1928                            apr_pool_t *scratch_pool)
1929{
1930  int i, num_condensed = targets->nelts;
1931  svn_boolean_t *removed;
1932  apr_array_header_t *abs_targets;
1933
1934  /* Early exit when there's no data to work on. */
1935  if (targets->nelts <= 0)
1936    {
1937      *pcommon = NULL;
1938      if (pcondensed_targets)
1939        *pcondensed_targets = NULL;
1940      return SVN_NO_ERROR;
1941    }
1942
1943  /* Get the absolute path of the first target. */
1944  SVN_ERR(svn_dirent_get_absolute(pcommon,
1945                                  APR_ARRAY_IDX(targets, 0, const char *),
1946                                  scratch_pool));
1947
1948  /* Early exit when there's only one dirent to work on. */
1949  if (targets->nelts == 1)
1950    {
1951      *pcommon = apr_pstrdup(result_pool, *pcommon);
1952      if (pcondensed_targets)
1953        *pcondensed_targets = apr_array_make(result_pool, 0,
1954                                             sizeof(const char *));
1955      return SVN_NO_ERROR;
1956    }
1957
1958  /* Copy the targets array, but with absolute dirents instead of
1959     relative.  Also, find the pcommon argument by finding what is
1960     common in all of the absolute dirents. NOTE: This is not as
1961     efficient as it could be.  The calculation of the basedir could
1962     be done in the loop below, which would save some calls to
1963     svn_dirent_get_longest_ancestor.  I decided to do it this way
1964     because I thought it would be simpler, since this way, we don't
1965     even do the loop if we don't need to condense the targets. */
1966
1967  removed = apr_pcalloc(scratch_pool, (targets->nelts *
1968                                          sizeof(svn_boolean_t)));
1969  abs_targets = apr_array_make(scratch_pool, targets->nelts,
1970                               sizeof(const char *));
1971
1972  APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
1973
1974  for (i = 1; i < targets->nelts; ++i)
1975    {
1976      const char *rel = APR_ARRAY_IDX(targets, i, const char *);
1977      const char *absolute;
1978      SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
1979      APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
1980      *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
1981                                                 scratch_pool);
1982    }
1983
1984  *pcommon = apr_pstrdup(result_pool, *pcommon);
1985
1986  if (pcondensed_targets != NULL)
1987    {
1988      size_t basedir_len;
1989
1990      if (remove_redundancies)
1991        {
1992          /* Find the common part of each pair of targets.  If
1993             common part is equal to one of the dirents, the other
1994             is a child of it, and can be removed.  If a target is
1995             equal to *pcommon, it can also be removed. */
1996
1997          /* First pass: when one non-removed target is a child of
1998             another non-removed target, remove the child. */
1999          for (i = 0; i < abs_targets->nelts; ++i)
2000            {
2001              int j;
2002
2003              if (removed[i])
2004                continue;
2005
2006              for (j = i + 1; j < abs_targets->nelts; ++j)
2007                {
2008                  const char *abs_targets_i;
2009                  const char *abs_targets_j;
2010                  const char *ancestor;
2011
2012                  if (removed[j])
2013                    continue;
2014
2015                  abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2016                  abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2017
2018                  ancestor = svn_dirent_get_longest_ancestor
2019                    (abs_targets_i, abs_targets_j, scratch_pool);
2020
2021                  if (*ancestor == '\0')
2022                    continue;
2023
2024                  if (strcmp(ancestor, abs_targets_i) == 0)
2025                    {
2026                      removed[j] = TRUE;
2027                      num_condensed--;
2028                    }
2029                  else if (strcmp(ancestor, abs_targets_j) == 0)
2030                    {
2031                      removed[i] = TRUE;
2032                      num_condensed--;
2033                    }
2034                }
2035            }
2036
2037          /* Second pass: when a target is the same as *pcommon,
2038             remove the target. */
2039          for (i = 0; i < abs_targets->nelts; ++i)
2040            {
2041              const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2042                                                        const char *);
2043
2044              if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2045                {
2046                  removed[i] = TRUE;
2047                  num_condensed--;
2048                }
2049            }
2050        }
2051
2052      /* Now create the return array, and copy the non-removed items */
2053      basedir_len = strlen(*pcommon);
2054      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2055                                           sizeof(const char *));
2056
2057      for (i = 0; i < abs_targets->nelts; ++i)
2058        {
2059          const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2060
2061          /* Skip this if it's been removed. */
2062          if (removed[i])
2063            continue;
2064
2065          /* If a common prefix was found, condensed_targets are given
2066             relative to that prefix.  */
2067          if (basedir_len > 0)
2068            {
2069              /* Only advance our pointer past a dirent separator if
2070                 REL_ITEM isn't the same as *PCOMMON.
2071
2072                 If *PCOMMON is a root dirent, basedir_len will already
2073                 include the closing '/', so never advance the pointer
2074                 here.
2075                 */
2076              rel_item += basedir_len;
2077              if (rel_item[0] &&
2078                  ! svn_dirent_is_root(*pcommon, basedir_len))
2079                rel_item++;
2080            }
2081
2082          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2083            = apr_pstrdup(result_pool, rel_item);
2084        }
2085    }
2086
2087  return SVN_NO_ERROR;
2088}
2089
2090svn_error_t *
2091svn_uri_condense_targets(const char **pcommon,
2092                         apr_array_header_t **pcondensed_targets,
2093                         const apr_array_header_t *targets,
2094                         svn_boolean_t remove_redundancies,
2095                         apr_pool_t *result_pool,
2096                         apr_pool_t *scratch_pool)
2097{
2098  int i, num_condensed = targets->nelts;
2099  apr_array_header_t *uri_targets;
2100  svn_boolean_t *removed;
2101
2102  /* Early exit when there's no data to work on. */
2103  if (targets->nelts <= 0)
2104    {
2105      *pcommon = NULL;
2106      if (pcondensed_targets)
2107        *pcondensed_targets = NULL;
2108      return SVN_NO_ERROR;
2109    }
2110
2111  *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2112                                  scratch_pool);
2113
2114  /* Early exit when there's only one uri to work on. */
2115  if (targets->nelts == 1)
2116    {
2117      *pcommon = apr_pstrdup(result_pool, *pcommon);
2118      if (pcondensed_targets)
2119        *pcondensed_targets = apr_array_make(result_pool, 0,
2120                                             sizeof(const char *));
2121      return SVN_NO_ERROR;
2122    }
2123
2124  /* Find the pcommon argument by finding what is common in all of the
2125     uris. NOTE: This is not as efficient as it could be.  The calculation
2126     of the basedir could be done in the loop below, which would
2127     save some calls to svn_uri_get_longest_ancestor.  I decided to do it
2128     this way because I thought it would be simpler, since this way, we don't
2129     even do the loop if we don't need to condense the targets. */
2130
2131  removed = apr_pcalloc(scratch_pool, (targets->nelts *
2132                                          sizeof(svn_boolean_t)));
2133  uri_targets = apr_array_make(scratch_pool, targets->nelts,
2134                               sizeof(const char *));
2135
2136  APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2137
2138  for (i = 1; i < targets->nelts; ++i)
2139    {
2140      const char *uri = svn_uri_canonicalize(
2141                           APR_ARRAY_IDX(targets, i, const char *),
2142                           scratch_pool);
2143      APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2144
2145      /* If the commonmost ancestor so far is empty, there's no point
2146         in continuing to search for a common ancestor at all.  But
2147         we'll keep looping for the sake of canonicalizing the
2148         targets, I suppose.  */
2149      if (**pcommon != '\0')
2150        *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2151                                                scratch_pool);
2152    }
2153
2154  *pcommon = apr_pstrdup(result_pool, *pcommon);
2155
2156  if (pcondensed_targets != NULL)
2157    {
2158      size_t basedir_len;
2159
2160      if (remove_redundancies)
2161        {
2162          /* Find the common part of each pair of targets.  If
2163             common part is equal to one of the dirents, the other
2164             is a child of it, and can be removed.  If a target is
2165             equal to *pcommon, it can also be removed. */
2166
2167          /* First pass: when one non-removed target is a child of
2168             another non-removed target, remove the child. */
2169          for (i = 0; i < uri_targets->nelts; ++i)
2170            {
2171              int j;
2172
2173              if (removed[i])
2174                continue;
2175
2176              for (j = i + 1; j < uri_targets->nelts; ++j)
2177                {
2178                  const char *uri_i;
2179                  const char *uri_j;
2180                  const char *ancestor;
2181
2182                  if (removed[j])
2183                    continue;
2184
2185                  uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2186                  uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2187
2188                  ancestor = svn_uri_get_longest_ancestor(uri_i,
2189                                                          uri_j,
2190                                                          scratch_pool);
2191
2192                  if (*ancestor == '\0')
2193                    continue;
2194
2195                  if (strcmp(ancestor, uri_i) == 0)
2196                    {
2197                      removed[j] = TRUE;
2198                      num_condensed--;
2199                    }
2200                  else if (strcmp(ancestor, uri_j) == 0)
2201                    {
2202                      removed[i] = TRUE;
2203                      num_condensed--;
2204                    }
2205                }
2206            }
2207
2208          /* Second pass: when a target is the same as *pcommon,
2209             remove the target. */
2210          for (i = 0; i < uri_targets->nelts; ++i)
2211            {
2212              const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2213                                                        const char *);
2214
2215              if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2216                {
2217                  removed[i] = TRUE;
2218                  num_condensed--;
2219                }
2220            }
2221        }
2222
2223      /* Now create the return array, and copy the non-removed items */
2224      basedir_len = strlen(*pcommon);
2225      *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2226                                           sizeof(const char *));
2227
2228      for (i = 0; i < uri_targets->nelts; ++i)
2229        {
2230          const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2231
2232          /* Skip this if it's been removed. */
2233          if (removed[i])
2234            continue;
2235
2236          /* If a common prefix was found, condensed_targets are given
2237             relative to that prefix.  */
2238          if (basedir_len > 0)
2239            {
2240              /* Only advance our pointer past a dirent separator if
2241                 REL_ITEM isn't the same as *PCOMMON.
2242
2243                 If *PCOMMON is a root dirent, basedir_len will already
2244                 include the closing '/', so never advance the pointer
2245                 here.
2246                 */
2247              rel_item += basedir_len;
2248              if ((rel_item[0] == '/') ||
2249                  (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2250                {
2251                  rel_item++;
2252                }
2253            }
2254
2255          APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2256            = svn_path_uri_decode(rel_item, result_pool);
2257        }
2258    }
2259
2260  return SVN_NO_ERROR;
2261}
2262
2263svn_error_t *
2264svn_dirent_is_under_root(svn_boolean_t *under_root,
2265                         const char **result_path,
2266                         const char *base_path,
2267                         const char *path,
2268                         apr_pool_t *result_pool)
2269{
2270  apr_status_t status;
2271  char *full_path;
2272
2273  *under_root = FALSE;
2274  if (result_path)
2275    *result_path = NULL;
2276
2277  status = apr_filepath_merge(&full_path,
2278                              base_path,
2279                              path,
2280                              APR_FILEPATH_NOTABOVEROOT
2281                              | APR_FILEPATH_SECUREROOTTEST,
2282                              result_pool);
2283
2284  if (status == APR_SUCCESS)
2285    {
2286      if (result_path)
2287        *result_path = svn_dirent_canonicalize(full_path, result_pool);
2288      *under_root = TRUE;
2289      return SVN_NO_ERROR;
2290    }
2291  else if (status == APR_EABOVEROOT)
2292    {
2293      *under_root = FALSE;
2294      return SVN_NO_ERROR;
2295    }
2296
2297  return svn_error_wrap_apr(status, NULL);
2298}
2299
2300svn_error_t *
2301svn_uri_get_dirent_from_file_url(const char **dirent,
2302                                 const char *url,
2303                                 apr_pool_t *pool)
2304{
2305  const char *hostname, *path;
2306
2307  SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2308
2309  /* Verify that the URL is well-formed (loosely) */
2310
2311  /* First, check for the "file://" prefix. */
2312  if (strncmp(url, "file://", 7) != 0)
2313    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2314                             _("Local URL '%s' does not contain 'file://' "
2315                               "prefix"), url);
2316
2317  /* Find the HOSTNAME portion and the PATH portion of the URL.  The host
2318     name is between the "file://" prefix and the next occurence of '/'.  We
2319     are considering everything from that '/' until the end of the URL to be
2320     the absolute path portion of the URL.
2321     If we got just "file://", treat it the same as "file:///". */
2322  hostname = url + 7;
2323  path = strchr(hostname, '/');
2324  if (path)
2325    hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2326  else
2327    path = "/";
2328
2329  /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2330  if (*hostname == '\0')
2331    hostname = NULL;
2332  else
2333    {
2334      hostname = svn_path_uri_decode(hostname, pool);
2335      if (strcmp(hostname, "localhost") == 0)
2336        hostname = NULL;
2337    }
2338
2339  /* Duplicate the URL, starting at the top of the path.
2340     At the same time, we URI-decode the path. */
2341#ifdef SVN_USE_DOS_PATHS
2342  /* On Windows, we'll typically have to skip the leading / if the
2343     path starts with a drive letter.  Like most Web browsers, We
2344     support two variants of this scheme:
2345
2346         file:///X:/path    and
2347         file:///X|/path
2348
2349    Note that, at least on WinNT and above,  file:////./X:/path  will
2350    also work, so we must make sure the transformation doesn't break
2351    that, and  file:///path  (that looks within the current drive
2352    only) should also keep working.
2353    If we got a non-empty hostname other than localhost, we convert this
2354    into an UNC path.  In this case, we obviously don't strip the slash
2355    even if the path looks like it starts with a drive letter.
2356  */
2357  {
2358    static const char valid_drive_letters[] =
2359      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2360    /* Casting away const! */
2361    char *dup_path = (char *)svn_path_uri_decode(path, pool);
2362
2363    /* This check assumes ':' and '|' are already decoded! */
2364    if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2365        && (dup_path[2] == ':' || dup_path[2] == '|'))
2366      {
2367        /* Skip the leading slash. */
2368        ++dup_path;
2369
2370        if (dup_path[1] == '|')
2371          dup_path[1] = ':';
2372
2373        if (dup_path[2] == '/' || dup_path[2] == '\0')
2374          {
2375            if (dup_path[2] == '\0')
2376              {
2377                /* A valid dirent for the driveroot must be like "C:/" instead of
2378                   just "C:" or svn_dirent_join() will use the current directory
2379                   on the drive instead */
2380                char *new_path = apr_pcalloc(pool, 4);
2381                new_path[0] = dup_path[0];
2382                new_path[1] = ':';
2383                new_path[2] = '/';
2384                new_path[3] = '\0';
2385                dup_path = new_path;
2386              }
2387          }
2388      }
2389    if (hostname)
2390      {
2391        if (dup_path[0] == '/' && dup_path[1] == '\0')
2392          return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2393                                   _("Local URL '%s' contains only a hostname, "
2394                                     "no path"), url);
2395
2396        /* We still know that the path starts with a slash. */
2397        *dirent = apr_pstrcat(pool, "//", hostname, dup_path, NULL);
2398      }
2399    else
2400      *dirent = dup_path;
2401  }
2402#else /* !SVN_USE_DOS_PATHS */
2403  /* Currently, the only hostnames we are allowing on non-Win32 platforms
2404     are the empty string and 'localhost'. */
2405  if (hostname)
2406    return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2407                             _("Local URL '%s' contains unsupported hostname"),
2408                             url);
2409
2410  *dirent = svn_path_uri_decode(path, pool);
2411#endif /* SVN_USE_DOS_PATHS */
2412  return SVN_NO_ERROR;
2413}
2414
2415svn_error_t *
2416svn_uri_get_file_url_from_dirent(const char **url,
2417                                 const char *dirent,
2418                                 apr_pool_t *pool)
2419{
2420  assert(svn_dirent_is_canonical(dirent, pool));
2421
2422  SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2423
2424  dirent = svn_path_uri_encode(dirent, pool);
2425
2426#ifndef SVN_USE_DOS_PATHS
2427  if (dirent[0] == '/' && dirent[1] == '\0')
2428    dirent = NULL; /* "file://" is the canonical form of "file:///" */
2429
2430  *url = apr_pstrcat(pool, "file://", dirent, (char *)NULL);
2431#else
2432  if (dirent[0] == '/')
2433    {
2434      /* Handle UNC paths //server/share -> file://server/share */
2435      assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2436
2437      *url = apr_pstrcat(pool, "file:", dirent, NULL);
2438    }
2439  else
2440    {
2441      char *uri = apr_pstrcat(pool, "file:///", dirent, NULL);
2442      apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2443
2444      /* "C:/" is a canonical dirent on Windows,
2445         but "file:///C:/" is not a canonical uri */
2446      if (uri[len-1] == '/')
2447        uri[len-1] = '\0';
2448
2449      *url = uri;
2450    }
2451#endif
2452
2453  return SVN_NO_ERROR;
2454}
2455
2456
2457
2458/* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2459
2460svn_boolean_t
2461svn_fspath__is_canonical(const char *fspath)
2462{
2463  return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2464}
2465
2466
2467const char *
2468svn_fspath__canonicalize(const char *fspath,
2469                         apr_pool_t *pool)
2470{
2471  if ((fspath[0] == '/') && (fspath[1] == '\0'))
2472    return "/";
2473
2474  return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2475                     (char *)NULL);
2476}
2477
2478
2479svn_boolean_t
2480svn_fspath__is_root(const char *fspath, apr_size_t len)
2481{
2482  /* directory is root if it's equal to '/' */
2483  return (len == 1 && fspath[0] == '/');
2484}
2485
2486
2487const char *
2488svn_fspath__skip_ancestor(const char *parent_fspath,
2489                          const char *child_fspath)
2490{
2491  assert(svn_fspath__is_canonical(parent_fspath));
2492  assert(svn_fspath__is_canonical(child_fspath));
2493
2494  return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2495}
2496
2497
2498const char *
2499svn_fspath__dirname(const char *fspath,
2500                    apr_pool_t *pool)
2501{
2502  assert(svn_fspath__is_canonical(fspath));
2503
2504  if (fspath[0] == '/' && fspath[1] == '\0')
2505    return apr_pstrdup(pool, fspath);
2506  else
2507    return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2508                       (char *)NULL);
2509}
2510
2511
2512const char *
2513svn_fspath__basename(const char *fspath,
2514                     apr_pool_t *pool)
2515{
2516  const char *result;
2517  assert(svn_fspath__is_canonical(fspath));
2518
2519  result = svn_relpath_basename(fspath + 1, pool);
2520
2521  assert(strchr(result, '/') == NULL);
2522  return result;
2523}
2524
2525void
2526svn_fspath__split(const char **dirpath,
2527                  const char **base_name,
2528                  const char *fspath,
2529                  apr_pool_t *result_pool)
2530{
2531  assert(dirpath != base_name);
2532
2533  if (dirpath)
2534    *dirpath = svn_fspath__dirname(fspath, result_pool);
2535
2536  if (base_name)
2537    *base_name = svn_fspath__basename(fspath, result_pool);
2538}
2539
2540char *
2541svn_fspath__join(const char *fspath,
2542                 const char *relpath,
2543                 apr_pool_t *result_pool)
2544{
2545  char *result;
2546  assert(svn_fspath__is_canonical(fspath));
2547  assert(svn_relpath_is_canonical(relpath));
2548
2549  if (relpath[0] == '\0')
2550    result = apr_pstrdup(result_pool, fspath);
2551  else if (fspath[1] == '\0')
2552    result = apr_pstrcat(result_pool, "/", relpath, (char *)NULL);
2553  else
2554    result = apr_pstrcat(result_pool, fspath, "/", relpath, (char *)NULL);
2555
2556  assert(svn_fspath__is_canonical(result));
2557  return result;
2558}
2559
2560char *
2561svn_fspath__get_longest_ancestor(const char *fspath1,
2562                                 const char *fspath2,
2563                                 apr_pool_t *result_pool)
2564{
2565  char *result;
2566  assert(svn_fspath__is_canonical(fspath1));
2567  assert(svn_fspath__is_canonical(fspath2));
2568
2569  result = apr_pstrcat(result_pool, "/",
2570                       svn_relpath_get_longest_ancestor(fspath1 + 1,
2571                                                        fspath2 + 1,
2572                                                        result_pool),
2573                       (char *)NULL);
2574
2575  assert(svn_fspath__is_canonical(result));
2576  return result;
2577}
2578
2579
2580
2581
2582/* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2583
2584const char *
2585svn_urlpath__canonicalize(const char *uri,
2586                          apr_pool_t *pool)
2587{
2588  if (svn_path_is_url(uri))
2589    {
2590      uri = svn_uri_canonicalize(uri, pool);
2591    }
2592  else
2593    {
2594      uri = svn_fspath__canonicalize(uri, pool);
2595      /* Do a little dance to normalize hex encoding. */
2596      uri = svn_path_uri_decode(uri, pool);
2597      uri = svn_path_uri_encode(uri, pool);
2598    }
2599  return uri;
2600}
2601
2602
2603/* -------------- The cert API (see private/svn_cert.h) ------------- */
2604
2605svn_boolean_t
2606svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2607{
2608  apr_size_t pattern_pos = 0, hostname_pos = 0;
2609
2610  /* support leading wildcards that composed of the only character in the
2611   * left-most label. */
2612  if (pattern->len >= 2 &&
2613      pattern->data[pattern_pos] == '*' &&
2614      pattern->data[pattern_pos + 1] == '.')
2615    {
2616      while (hostname_pos < hostname->len &&
2617             hostname->data[hostname_pos] != '.')
2618        {
2619          hostname_pos++;
2620        }
2621      /* Assume that the wildcard must match something.  Rule 2 says
2622       * that *.example.com should not match example.com.  If the wildcard
2623       * ends up not matching anything then it matches .example.com which
2624       * seems to be essentially the same as just example.com */
2625      if (hostname_pos == 0)
2626        return FALSE;
2627
2628      pattern_pos++;
2629    }
2630
2631  while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2632    {
2633      char pattern_c = pattern->data[pattern_pos];
2634      char hostname_c = hostname->data[hostname_pos];
2635
2636      /* fold case as described in RFC 4343.
2637       * Note: We actually convert to lowercase, since our URI
2638       * canonicalization code converts to lowercase and generally
2639       * most certs are issued with lowercase DNS names, meaning
2640       * this avoids the fold operation in most cases.  The RFC
2641       * suggests the opposite transformation, but doesn't require
2642       * any specific implementation in any case.  It is critical
2643       * that this folding be locale independent so you can't use
2644       * tolower(). */
2645      pattern_c = canonicalize_to_lower(pattern_c);
2646      hostname_c = canonicalize_to_lower(hostname_c);
2647
2648      if (pattern_c != hostname_c)
2649        {
2650          /* doesn't match */
2651          return FALSE;
2652        }
2653      else
2654        {
2655          /* characters match so skip both */
2656          pattern_pos++;
2657          hostname_pos++;
2658        }
2659    }
2660
2661  /* ignore a trailing period on the hostname since this has no effect on the
2662   * security of the matching.  See the following for the long explanation as
2663   * to why:
2664   * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2665   */
2666  if (pattern_pos == pattern->len &&
2667      hostname_pos == hostname->len - 1 &&
2668      hostname->data[hostname_pos] == '.')
2669    hostname_pos++;
2670
2671  if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2672    {
2673      /* end didn't match */
2674      return FALSE;
2675    }
2676
2677  return TRUE;
2678}
2679