1/* dump.c --- writing filesystem contents into a portable 'dumpfile' format.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23
24#include <stdarg.h>
25
26#include "svn_private_config.h"
27#include "svn_pools.h"
28#include "svn_error.h"
29#include "svn_fs.h"
30#include "svn_hash.h"
31#include "svn_iter.h"
32#include "svn_repos.h"
33#include "svn_string.h"
34#include "svn_dirent_uri.h"
35#include "svn_path.h"
36#include "svn_time.h"
37#include "svn_checksum.h"
38#include "svn_props.h"
39#include "svn_sorts.h"
40
41#include "private/svn_repos_private.h"
42#include "private/svn_mergeinfo_private.h"
43#include "private/svn_fs_private.h"
44#include "private/svn_sorts_private.h"
45#include "private/svn_utf_private.h"
46#include "private/svn_cache.h"
47#include "private/svn_fspath.h"
48
49#define ARE_VALID_COPY_ARGS(p,r) ((p) && SVN_IS_VALID_REVNUM(r))
50
51/*----------------------------------------------------------------------*/
52
53
54/* To be able to check whether a path exists in the current revision
55   (as changes come in), we need to track the relevant tree changes.
56
57   In particular, we remember deletions, additions and copies including
58   their copy-from info.  Since the dump performs a pre-order tree walk,
59   we only need to store the data for the stack of parent folders.
60
61   The problem that we are trying to solve is that the dump receives
62   transforming operations whose validity depends on previous operations
63   in the same revision but cannot be checked against the final state
64   as stored in the repository as that is the state *after* we applied
65   the respective tree changes.
66
67   Note that the tracker functions don't perform any sanity or validity
68   checks.  Those higher-level tests have to be done in the calling code.
69   However, there is no way to corrupt the data structure using the
70   provided functions.
71 */
72
73/* Single entry in the path tracker.  Not all levels along the path
74   hierarchy do need to have an instance of this struct but only those
75   that got changed by a tree modification.
76
77   Please note that the path info in this struct is stored in re-usable
78   stringbuf objects such that we don't need to allocate more memory than
79   the longest path we encounter.
80 */
81typedef struct path_tracker_entry_t
82{
83  /* path in the current tree */
84  svn_stringbuf_t *path;
85
86  /* copy-from path (must be empty if COPYFROM_REV is SVN_INVALID_REVNUM) */
87  svn_stringbuf_t *copyfrom_path;
88
89  /* copy-from revision (SVN_INVALID_REVNUM for additions / replacements
90     that don't copy history, i.e. with no sub-tree) */
91  svn_revnum_t copyfrom_rev;
92
93  /* if FALSE, PATH has been deleted */
94  svn_boolean_t exists;
95} path_tracker_entry_t;
96
97/* Tracks all tree modifications above the current path.
98 */
99typedef struct path_tracker_t
100{
101  /* Container for all relevant tree changes in depth order.
102     May contain more entries than DEPTH to allow for reusing memory.
103     Only entries 0 .. DEPTH-1 are valid.
104   */
105  apr_array_header_t *stack;
106
107  /* Number of relevant entries in STACK.  May be 0 */
108  int depth;
109
110  /* Revision that we current track.  If DEPTH is 0, paths are exist in
111     REVISION exactly when they exist in REVISION-1.  This applies only
112     to the current state of our tree walk.
113   */
114  svn_revnum_t revision;
115
116  /* Allocate container entries here. */
117  apr_pool_t *pool;
118} path_tracker_t;
119
120/* Return a new path tracker object for REVISION, allocated in POOL.
121 */
122static path_tracker_t *
123tracker_create(svn_revnum_t revision,
124               apr_pool_t *pool)
125{
126  path_tracker_t *result = apr_pcalloc(pool, sizeof(*result));
127  result->stack = apr_array_make(pool, 16, sizeof(path_tracker_entry_t));
128  result->revision = revision;
129  result->pool = pool;
130
131  return result;
132}
133
134/* Remove all entries from TRACKER that are not relevant to PATH anymore.
135 * If ALLOW_EXACT_MATCH is FALSE, keep only entries that pertain to
136 * parent folders but not to PATH itself.
137 *
138 * This internal function implicitly updates the tracker state during the
139 * tree by removing "past" entries.  Other functions will add entries when
140 * we encounter a new tree change.
141 */
142static void
143tracker_trim(path_tracker_t *tracker,
144             const char *path,
145             svn_boolean_t allow_exact_match)
146{
147  /* remove everything that is unrelated to PATH.
148     Note that TRACKER->STACK is depth-ordered,
149     i.e. stack[N] is a (maybe indirect) parent of stack[N+1]
150     for N+1 < DEPTH.
151   */
152  for (; tracker->depth; --tracker->depth)
153    {
154      path_tracker_entry_t *parent = &APR_ARRAY_IDX(tracker->stack,
155                                                    tracker->depth - 1,
156                                                    path_tracker_entry_t);
157      const char *rel_path
158        = svn_dirent_skip_ancestor(parent->path->data, path);
159
160      /* always keep parents.  Keep exact matches when allowed. */
161      if (rel_path && (allow_exact_match || *rel_path != '\0'))
162        break;
163    }
164}
165
166/* Using TRACKER, check what path at what revision in the repository must
167   be checked to decide that whether PATH exists.  Return the info in
168   *ORIG_PATH and *ORIG_REV, respectively.
169
170   If the path is known to not exist, *ORIG_PATH will be NULL and *ORIG_REV
171   will be SVN_INVALID_REVNUM.  If *ORIG_REV is SVN_INVALID_REVNUM, PATH
172   has just been added in the revision currently being tracked.
173
174   Use POOL for allocations.  Note that *ORIG_PATH may be allocated in POOL,
175   a reference to internal data with the same lifetime as TRACKER or just
176   PATH.
177 */
178static void
179tracker_lookup(const char **orig_path,
180               svn_revnum_t *orig_rev,
181               path_tracker_t *tracker,
182               const char *path,
183               apr_pool_t *pool)
184{
185  tracker_trim(tracker, path, TRUE);
186  if (tracker->depth == 0)
187    {
188      /* no tree changes -> paths are the same as in the previous rev. */
189      *orig_path = path;
190      *orig_rev = tracker->revision - 1;
191    }
192  else
193    {
194      path_tracker_entry_t *parent = &APR_ARRAY_IDX(tracker->stack,
195                                                    tracker->depth - 1,
196                                                    path_tracker_entry_t);
197      if (parent->exists)
198        {
199          const char *rel_path
200            = svn_dirent_skip_ancestor(parent->path->data, path);
201
202          if (parent->copyfrom_rev != SVN_INVALID_REVNUM)
203            {
204              /* parent is a copy with history. Translate path. */
205              *orig_path = svn_dirent_join(parent->copyfrom_path->data,
206                                           rel_path, pool);
207              *orig_rev = parent->copyfrom_rev;
208            }
209          else if (*rel_path == '\0')
210            {
211              /* added in this revision with no history */
212              *orig_path = path;
213              *orig_rev = tracker->revision;
214            }
215          else
216            {
217              /* parent got added but not this path */
218              *orig_path = NULL;
219              *orig_rev = SVN_INVALID_REVNUM;
220            }
221        }
222      else
223        {
224          /* (maybe parent) path has been deleted */
225          *orig_path = NULL;
226          *orig_rev = SVN_INVALID_REVNUM;
227        }
228    }
229}
230
231/* Return a reference to the stack entry in TRACKER for PATH.  If no
232   suitable entry exists, add one.  Implicitly updates the tracked tree
233   location.
234
235   Only the PATH member of the result is being updated.  All other members
236   will have undefined values.
237 */
238static path_tracker_entry_t *
239tracker_add_entry(path_tracker_t *tracker,
240                  const char *path)
241{
242  path_tracker_entry_t *entry;
243  tracker_trim(tracker, path, FALSE);
244
245  if (tracker->depth == tracker->stack->nelts)
246    {
247      entry = apr_array_push(tracker->stack);
248      entry->path = svn_stringbuf_create_empty(tracker->pool);
249      entry->copyfrom_path = svn_stringbuf_create_empty(tracker->pool);
250    }
251  else
252    {
253      entry = &APR_ARRAY_IDX(tracker->stack, tracker->depth,
254                             path_tracker_entry_t);
255    }
256
257  svn_stringbuf_set(entry->path, path);
258  ++tracker->depth;
259
260  return entry;
261}
262
263/* Update the TRACKER with a copy from COPYFROM_PATH@COPYFROM_REV to
264   PATH in the tracked revision.
265 */
266static void
267tracker_path_copy(path_tracker_t *tracker,
268                  const char *path,
269                  const char *copyfrom_path,
270                  svn_revnum_t copyfrom_rev)
271{
272  path_tracker_entry_t *entry = tracker_add_entry(tracker, path);
273
274  svn_stringbuf_set(entry->copyfrom_path, copyfrom_path);
275  entry->copyfrom_rev = copyfrom_rev;
276  entry->exists = TRUE;
277}
278
279/* Update the TRACKER with a plain addition of PATH (without history).
280 */
281static void
282tracker_path_add(path_tracker_t *tracker,
283                 const char *path)
284{
285  path_tracker_entry_t *entry = tracker_add_entry(tracker, path);
286
287  svn_stringbuf_setempty(entry->copyfrom_path);
288  entry->copyfrom_rev = SVN_INVALID_REVNUM;
289  entry->exists = TRUE;
290}
291
292/* Update the TRACKER with a replacement of PATH with a plain addition
293   (without history).
294 */
295static void
296tracker_path_replace(path_tracker_t *tracker,
297                     const char *path)
298{
299  /* this will implicitly purge all previous sub-tree info from STACK.
300     Thus, no need to tack the deletion explicitly. */
301  tracker_path_add(tracker, path);
302}
303
304/* Update the TRACKER with a deletion of PATH.
305 */
306static void
307tracker_path_delete(path_tracker_t *tracker,
308                    const char *path)
309{
310  path_tracker_entry_t *entry = tracker_add_entry(tracker, path);
311
312  svn_stringbuf_setempty(entry->copyfrom_path);
313  entry->copyfrom_rev = SVN_INVALID_REVNUM;
314  entry->exists = FALSE;
315}
316
317
318/* Compute the delta between OLDROOT/OLDPATH and NEWROOT/NEWPATH and
319   store it into a new temporary file *TEMPFILE.  OLDROOT may be NULL,
320   in which case the delta will be computed against an empty file, as
321   per the svn_fs_get_file_delta_stream docstring.  Record the length
322   of the temporary file in *LEN, and rewind the file before
323   returning. */
324static svn_error_t *
325store_delta(apr_file_t **tempfile, svn_filesize_t *len,
326            svn_fs_root_t *oldroot, const char *oldpath,
327            svn_fs_root_t *newroot, const char *newpath, apr_pool_t *pool)
328{
329  svn_stream_t *temp_stream;
330  apr_off_t offset;
331  svn_txdelta_stream_t *delta_stream;
332  svn_txdelta_window_handler_t wh;
333  void *whb;
334
335  /* Create a temporary file and open a stream to it. Note that we need
336     the file handle in order to rewind it. */
337  SVN_ERR(svn_io_open_unique_file3(tempfile, NULL, NULL,
338                                   svn_io_file_del_on_pool_cleanup,
339                                   pool, pool));
340  temp_stream = svn_stream_from_aprfile2(*tempfile, TRUE, pool);
341
342  /* Compute the delta and send it to the temporary file. */
343  SVN_ERR(svn_fs_get_file_delta_stream(&delta_stream, oldroot, oldpath,
344                                       newroot, newpath, pool));
345  svn_txdelta_to_svndiff3(&wh, &whb, temp_stream, 0,
346                          SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
347  SVN_ERR(svn_txdelta_send_txstream(delta_stream, wh, whb, pool));
348
349  /* Get the length of the temporary file and rewind it. */
350  SVN_ERR(svn_io_file_get_offset(&offset, *tempfile, pool));
351  *len = offset;
352  offset = 0;
353  return svn_io_file_seek(*tempfile, APR_SET, &offset, pool);
354}
355
356
357/* Send a notification of type #svn_repos_notify_warning, subtype WARNING,
358   with message WARNING_FMT formatted with the remaining variable arguments.
359   Send it by calling NOTIFY_FUNC (if not null) with NOTIFY_BATON.
360 */
361__attribute__((format(printf, 5, 6)))
362static void
363notify_warning(apr_pool_t *scratch_pool,
364               svn_repos_notify_func_t notify_func,
365               void *notify_baton,
366               svn_repos_notify_warning_t warning,
367               const char *warning_fmt,
368               ...)
369{
370  va_list va;
371  svn_repos_notify_t *notify;
372
373  if (notify_func == NULL)
374    return;
375
376  notify = svn_repos_notify_create(svn_repos_notify_warning, scratch_pool);
377  notify->warning = warning;
378  va_start(va, warning_fmt);
379  notify->warning_str = apr_pvsprintf(scratch_pool, warning_fmt, va);
380  va_end(va);
381
382  notify_func(notify_baton, notify, scratch_pool);
383}
384
385
386/*----------------------------------------------------------------------*/
387
388/* Write to STREAM the header in HEADERS named KEY, if present.
389 */
390static svn_error_t *
391write_header(svn_stream_t *stream,
392             apr_hash_t *headers,
393             const char *key,
394             apr_pool_t *scratch_pool)
395{
396  const char *val = svn_hash_gets(headers, key);
397
398  if (val)
399    {
400      SVN_ERR(svn_stream_printf(stream, scratch_pool,
401                                "%s: %s\n", key, val));
402    }
403  return SVN_NO_ERROR;
404}
405
406/* Write headers, in arbitrary order.
407 * ### TODO: use a stable order
408 * ### Modifies HEADERS.
409 */
410static svn_error_t *
411write_revision_headers(svn_stream_t *stream,
412                       apr_hash_t *headers,
413                       apr_pool_t *scratch_pool)
414{
415  const char **h;
416  apr_hash_index_t *hi;
417
418  static const char *revision_headers_order[] =
419  {
420    SVN_REPOS_DUMPFILE_REVISION_NUMBER,  /* must be first */
421    NULL
422  };
423
424  /* Write some headers in a given order */
425  for (h = revision_headers_order; *h; h++)
426    {
427      SVN_ERR(write_header(stream, headers, *h, scratch_pool));
428      svn_hash_sets(headers, *h, NULL);
429    }
430
431  /* Write any and all remaining headers except Content-length.
432   * ### TODO: use a stable order
433   */
434  for (hi = apr_hash_first(scratch_pool, headers); hi; hi = apr_hash_next(hi))
435    {
436      const char *key = apr_hash_this_key(hi);
437
438      if (strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH) != 0)
439        SVN_ERR(write_header(stream, headers, key, scratch_pool));
440    }
441
442  /* Content-length must be last */
443  SVN_ERR(write_header(stream, headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH,
444                       scratch_pool));
445
446  return SVN_NO_ERROR;
447}
448
449/* A header entry: the element type of the apr_array_header_t which is
450 * the real type of svn_repos__dumpfile_headers_t.
451 */
452typedef struct svn_repos__dumpfile_header_entry_t {
453  const char *key, *val;
454} svn_repos__dumpfile_header_entry_t;
455
456svn_repos__dumpfile_headers_t *
457svn_repos__dumpfile_headers_create(apr_pool_t *pool)
458{
459  svn_repos__dumpfile_headers_t *headers
460    = apr_array_make(pool, 5, sizeof(svn_repos__dumpfile_header_entry_t));
461
462  return headers;
463}
464
465void
466svn_repos__dumpfile_header_push(svn_repos__dumpfile_headers_t *headers,
467                                const char *key,
468                                const char *val)
469{
470  svn_repos__dumpfile_header_entry_t *h
471    = &APR_ARRAY_PUSH(headers, svn_repos__dumpfile_header_entry_t);
472
473  h->key = apr_pstrdup(headers->pool, key);
474  h->val = apr_pstrdup(headers->pool, val);
475}
476
477void
478svn_repos__dumpfile_header_pushf(svn_repos__dumpfile_headers_t *headers,
479                                 const char *key,
480                                 const char *val_fmt,
481                                 ...)
482{
483  va_list ap;
484  svn_repos__dumpfile_header_entry_t *h
485    = &APR_ARRAY_PUSH(headers, svn_repos__dumpfile_header_entry_t);
486
487  h->key = apr_pstrdup(headers->pool, key);
488  va_start(ap, val_fmt);
489  h->val = apr_pvsprintf(headers->pool, val_fmt, ap);
490  va_end(ap);
491}
492
493svn_error_t *
494svn_repos__dump_headers(svn_stream_t *stream,
495                        svn_repos__dumpfile_headers_t *headers,
496                        apr_pool_t *scratch_pool)
497{
498  int i;
499
500  for (i = 0; i < headers->nelts; i++)
501    {
502      svn_repos__dumpfile_header_entry_t *h
503        = &APR_ARRAY_IDX(headers, i, svn_repos__dumpfile_header_entry_t);
504
505      SVN_ERR(svn_stream_printf(stream, scratch_pool,
506                                "%s: %s\n", h->key, h->val));
507    }
508
509  /* End of headers */
510  SVN_ERR(svn_stream_puts(stream, "\n"));
511
512  return SVN_NO_ERROR;
513}
514
515svn_error_t *
516svn_repos__dump_magic_header_record(svn_stream_t *dump_stream,
517                                    int version,
518                                    apr_pool_t *pool)
519{
520  SVN_ERR(svn_stream_printf(dump_stream, pool,
521                            SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n",
522                            version));
523  return SVN_NO_ERROR;
524}
525
526svn_error_t *
527svn_repos__dump_uuid_header_record(svn_stream_t *dump_stream,
528                                   const char *uuid,
529                                   apr_pool_t *pool)
530{
531  if (uuid)
532    {
533      SVN_ERR(svn_stream_printf(dump_stream, pool, SVN_REPOS_DUMPFILE_UUID
534                                ": %s\n\n", uuid));
535    }
536  return SVN_NO_ERROR;
537}
538
539svn_error_t *
540svn_repos__dump_revision_record(svn_stream_t *dump_stream,
541                                svn_revnum_t revision,
542                                apr_hash_t *extra_headers,
543                                apr_hash_t *revprops,
544                                svn_boolean_t props_section_always,
545                                apr_pool_t *scratch_pool)
546{
547  svn_stringbuf_t *propstring = NULL;
548  apr_hash_t *headers;
549
550  if (extra_headers)
551    headers = apr_hash_copy(scratch_pool, extra_headers);
552  else
553    headers = apr_hash_make(scratch_pool);
554
555  /* ### someday write a revision-content-checksum */
556
557  svn_hash_sets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER,
558                apr_psprintf(scratch_pool, "%ld", revision));
559
560  if (apr_hash_count(revprops) || props_section_always)
561    {
562      svn_stream_t *propstream;
563
564      propstring = svn_stringbuf_create_empty(scratch_pool);
565      propstream = svn_stream_from_stringbuf(propstring, scratch_pool);
566      SVN_ERR(svn_hash_write2(revprops, propstream, "PROPS-END", scratch_pool));
567      SVN_ERR(svn_stream_close(propstream));
568
569      svn_hash_sets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH,
570                    apr_psprintf(scratch_pool,
571                                 "%" APR_SIZE_T_FMT, propstring->len));
572    }
573
574  if (propstring)
575    {
576      /* Write out a regular Content-length header for the benefit of
577         non-Subversion RFC-822 parsers. */
578      svn_hash_sets(headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH,
579                    apr_psprintf(scratch_pool,
580                                 "%" APR_SIZE_T_FMT, propstring->len));
581    }
582
583  SVN_ERR(write_revision_headers(dump_stream, headers, scratch_pool));
584
585  /* End of headers */
586  SVN_ERR(svn_stream_puts(dump_stream, "\n"));
587
588  /* Property data. */
589  if (propstring)
590    {
591      SVN_ERR(svn_stream_write(dump_stream, propstring->data, &propstring->len));
592    }
593
594  /* put an end to revision */
595  SVN_ERR(svn_stream_puts(dump_stream, "\n"));
596
597  return SVN_NO_ERROR;
598}
599
600svn_error_t *
601svn_repos__dump_node_record(svn_stream_t *dump_stream,
602                            svn_repos__dumpfile_headers_t *headers,
603                            svn_stringbuf_t *props_str,
604                            svn_boolean_t has_text,
605                            svn_filesize_t text_content_length,
606                            svn_boolean_t content_length_always,
607                            apr_pool_t *scratch_pool)
608{
609  svn_filesize_t content_length = 0;
610
611  /* add content-length headers */
612  if (props_str)
613    {
614      svn_repos__dumpfile_header_pushf(
615        headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH,
616        "%" APR_SIZE_T_FMT, props_str->len);
617      content_length += props_str->len;
618    }
619  if (has_text)
620    {
621      svn_repos__dumpfile_header_pushf(
622        headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH,
623        "%" SVN_FILESIZE_T_FMT, text_content_length);
624      content_length += text_content_length;
625    }
626  if (content_length_always || props_str || has_text)
627    {
628      svn_repos__dumpfile_header_pushf(
629        headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH,
630        "%" SVN_FILESIZE_T_FMT, content_length);
631    }
632
633  /* write the headers */
634  SVN_ERR(svn_repos__dump_headers(dump_stream, headers, scratch_pool));
635
636  /* write the props */
637  if (props_str)
638    {
639      SVN_ERR(svn_stream_write(dump_stream, props_str->data, &props_str->len));
640    }
641  return SVN_NO_ERROR;
642}
643
644/*----------------------------------------------------------------------*/
645
646/** An editor which dumps node-data in 'dumpfile format' to a file. **/
647
648/* Look, mom!  No file batons! */
649
650struct edit_baton
651{
652  /* The relpath which implicitly prepends all full paths coming into
653     this editor.  This will almost always be "".  */
654  const char *path;
655
656  /* The stream to dump to. */
657  svn_stream_t *stream;
658
659  /* Send feedback here, if non-NULL */
660  svn_repos_notify_func_t notify_func;
661  void *notify_baton;
662
663  /* The fs revision root, so we can read the contents of paths. */
664  svn_fs_root_t *fs_root;
665  svn_revnum_t current_rev;
666
667  /* The fs, so we can grab historic information if needed. */
668  svn_fs_t *fs;
669
670  /* True if dumped nodes should output deltas instead of full text. */
671  svn_boolean_t use_deltas;
672
673  /* True if this "dump" is in fact a verify. */
674  svn_boolean_t verify;
675
676  /* True if checking UCS normalization during a verify. */
677  svn_boolean_t check_normalization;
678
679  /* The first revision dumped in this dumpstream. */
680  svn_revnum_t oldest_dumped_rev;
681
682  /* If not NULL, set to true if any references to revisions older than
683     OLDEST_DUMPED_REV were found in the dumpstream. */
684  svn_boolean_t *found_old_reference;
685
686  /* If not NULL, set to true if any mergeinfo was dumped which contains
687     revisions older than OLDEST_DUMPED_REV. */
688  svn_boolean_t *found_old_mergeinfo;
689
690  /* Structure allows us to verify the paths currently being dumped.
691     If NULL, validity checks are being skipped. */
692  path_tracker_t *path_tracker;
693};
694
695struct dir_baton
696{
697  struct edit_baton *edit_baton;
698
699  /* has this directory been written to the output stream? */
700  svn_boolean_t written_out;
701
702  /* the repository relpath associated with this directory */
703  const char *path;
704
705  /* The comparison repository relpath and revision of this directory.
706     If both of these are valid, use them as a source against which to
707     compare the directory instead of the default comparison source of
708     PATH in the previous revision. */
709  const char *cmp_path;
710  svn_revnum_t cmp_rev;
711
712  /* hash of paths that need to be deleted, though some -might- be
713     replaced.  maps const char * paths to this dir_baton.  (they're
714     full paths, because that's what the editor driver gives us.  but
715     really, they're all within this directory.) */
716  apr_hash_t *deleted_entries;
717
718  /* A flag indicating that new entries have been added to this
719     directory in this revision. Used to optimize detection of UCS
720     representation collisions; we will only check for that in
721     revisions where new names appear in the directory. */
722  svn_boolean_t check_name_collision;
723
724  /* pool to be used for deleting the hash items */
725  apr_pool_t *pool;
726};
727
728
729/* Make a directory baton to represent the directory was path
730   (relative to EDIT_BATON's path) is PATH.
731
732   CMP_PATH/CMP_REV are the path/revision against which this directory
733   should be compared for changes.  If either is omitted (NULL for the
734   path, SVN_INVALID_REVNUM for the rev), just compare this directory
735   PATH against itself in the previous revision.
736
737   PB is the directory baton of this directory's parent,
738   or NULL if this is the top-level directory of the edit.
739
740   Perform all allocations in POOL.  */
741static struct svn_error_t *
742make_dir_baton(struct dir_baton **dbp,
743               const char *path,
744               const char *cmp_path,
745               svn_revnum_t cmp_rev,
746               void *edit_baton,
747               struct dir_baton *pb,
748               apr_pool_t *pool)
749{
750  struct edit_baton *eb = edit_baton;
751  struct dir_baton *new_db = apr_pcalloc(pool, sizeof(*new_db));
752  const char *full_path, *canonicalized_path;
753
754  /* A path relative to nothing?  I don't think so. */
755  SVN_ERR_ASSERT(!path || pb);
756
757  /* Construct the full path of this node. */
758  if (pb)
759    full_path = svn_relpath_join(eb->path, path, pool);
760  else
761    full_path = apr_pstrdup(pool, eb->path);
762
763  /* Remove leading slashes from copyfrom paths. */
764  if (cmp_path)
765    {
766      SVN_ERR(svn_relpath_canonicalize_safe(&canonicalized_path, NULL,
767                                            cmp_path, pool, pool));
768      cmp_path = canonicalized_path;
769    }
770
771  new_db->edit_baton = eb;
772  new_db->path = full_path;
773  new_db->cmp_path = cmp_path;
774  new_db->cmp_rev = cmp_rev;
775  new_db->written_out = FALSE;
776  new_db->deleted_entries = apr_hash_make(pool);
777  new_db->check_name_collision = FALSE;
778  new_db->pool = pool;
779
780  *dbp = new_db;
781  return SVN_NO_ERROR;
782}
783
784static svn_error_t *
785fetch_kind_func(svn_node_kind_t *kind,
786                void *baton,
787                const char *path,
788                svn_revnum_t base_revision,
789                apr_pool_t *scratch_pool);
790
791/* Return an error when PATH in REVISION does not exist or is of a
792   different kind than EXPECTED_KIND.  If the latter is svn_node_unknown,
793   skip that check.  Use EB for context information.  If REVISION is the
794   current revision, use EB's path tracker to follow renames, deletions,
795   etc.
796
797   Use SCRATCH_POOL for temporary allocations.
798   No-op if EB's path tracker has not been initialized.
799 */
800static svn_error_t *
801node_must_exist(struct edit_baton *eb,
802                const char *path,
803                svn_revnum_t revision,
804                svn_node_kind_t expected_kind,
805                apr_pool_t *scratch_pool)
806{
807  svn_node_kind_t kind = svn_node_none;
808
809  /* in case the caller is trying something stupid ... */
810  if (eb->path_tracker == NULL)
811    return SVN_NO_ERROR;
812
813  /* paths pertaining to the revision currently being processed must
814     be translated / checked using our path tracker. */
815  if (revision == eb->path_tracker->revision)
816    tracker_lookup(&path, &revision, eb->path_tracker, path, scratch_pool);
817
818  /* determine the node type (default: no such node) */
819  if (path)
820    SVN_ERR(fetch_kind_func(&kind, eb, path, revision, scratch_pool));
821
822  /* check results */
823  if (kind == svn_node_none)
824    return svn_error_createf(SVN_ERR_FS_NOT_FOUND, NULL,
825                             _("Path '%s' not found in r%ld."),
826                             path, revision);
827
828  if (expected_kind != kind && expected_kind != svn_node_unknown)
829    return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
830                             _("Unexpected node kind %d for '%s' at r%ld. "
831                               "Expected kind was %d."),
832                             kind, path, revision, expected_kind);
833
834  return SVN_NO_ERROR;
835}
836
837/* Return an error when PATH exists in REVISION.  Use EB for context
838   information.  If REVISION is the current revision, use EB's path
839   tracker to follow renames, deletions, etc.
840
841   Use SCRATCH_POOL for temporary allocations.
842   No-op if EB's path tracker has not been initialized.
843 */
844static svn_error_t *
845node_must_not_exist(struct edit_baton *eb,
846                    const char *path,
847                    svn_revnum_t revision,
848                    apr_pool_t *scratch_pool)
849{
850  svn_node_kind_t kind = svn_node_none;
851
852  /* in case the caller is trying something stupid ... */
853  if (eb->path_tracker == NULL)
854    return SVN_NO_ERROR;
855
856  /* paths pertaining to the revision currently being processed must
857     be translated / checked using our path tracker. */
858  if (revision == eb->path_tracker->revision)
859    tracker_lookup(&path, &revision, eb->path_tracker, path, scratch_pool);
860
861  /* determine the node type (default: no such node) */
862  if (path)
863    SVN_ERR(fetch_kind_func(&kind, eb, path, revision, scratch_pool));
864
865  /* check results */
866  if (kind != svn_node_none)
867    return svn_error_createf(SVN_ERR_FS_ALREADY_EXISTS, NULL,
868                             _("Path '%s' exists in r%ld."),
869                             path, revision);
870
871  return SVN_NO_ERROR;
872}
873
874/* If the mergeinfo in MERGEINFO_STR refers to any revisions older than
875 * OLDEST_DUMPED_REV, issue a warning and set *FOUND_OLD_MERGEINFO to TRUE,
876 * otherwise leave *FOUND_OLD_MERGEINFO unchanged.
877 */
878static svn_error_t *
879verify_mergeinfo_revisions(svn_boolean_t *found_old_mergeinfo,
880                           const char *mergeinfo_str,
881                           svn_revnum_t oldest_dumped_rev,
882                           svn_repos_notify_func_t notify_func,
883                           void *notify_baton,
884                           apr_pool_t *pool)
885{
886  svn_mergeinfo_t mergeinfo, old_mergeinfo;
887
888  SVN_ERR(svn_mergeinfo_parse(&mergeinfo, mergeinfo_str, pool));
889  SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
890            &old_mergeinfo, mergeinfo,
891            oldest_dumped_rev - 1, 0,
892            TRUE, pool, pool));
893
894  if (apr_hash_count(old_mergeinfo))
895    {
896      notify_warning(pool, notify_func, notify_baton,
897                     svn_repos_notify_warning_found_old_mergeinfo,
898                     _("Mergeinfo referencing revision(s) prior "
899                       "to the oldest dumped revision (r%ld). "
900                       "Loading this dump may result in invalid "
901                       "mergeinfo."),
902                     oldest_dumped_rev);
903
904      if (found_old_mergeinfo)
905        *found_old_mergeinfo = TRUE;
906    }
907
908  return SVN_NO_ERROR;
909}
910
911/* Unique string pointers used by verify_mergeinfo_normalization()
912   and check_name_collision() */
913static const char normalized_unique[] = "normalized_unique";
914static const char normalized_collision[] = "normalized_collision";
915
916
917/* Baton for extract_mergeinfo_paths */
918struct extract_mergeinfo_paths_baton
919{
920  apr_hash_t *result;
921  svn_boolean_t normalize;
922  svn_membuf_t buffer;
923};
924
925/* Hash iterator that uniquifies all keys into a single hash table,
926   optionally normalizing them first. */
927static svn_error_t *
928extract_mergeinfo_paths(void *baton, const void *key, apr_ssize_t klen,
929                         void *val, apr_pool_t *iterpool)
930{
931  struct extract_mergeinfo_paths_baton *const xb = baton;
932  if (xb->normalize)
933    {
934      const char *normkey;
935      SVN_ERR(svn_utf__normalize(&normkey, key, klen, &xb->buffer));
936      svn_hash_sets(xb->result,
937                    apr_pstrdup(xb->buffer.pool, normkey),
938                    normalized_unique);
939    }
940  else
941    apr_hash_set(xb->result,
942                 apr_pmemdup(xb->buffer.pool, key, klen + 1), klen,
943                 normalized_unique);
944  return SVN_NO_ERROR;
945}
946
947/* Baton for filter_mergeinfo_paths */
948struct filter_mergeinfo_paths_baton
949{
950  apr_hash_t *paths;
951};
952
953/* Compare two sets of denormalized paths from mergeinfo entries,
954   removing duplicates. */
955static svn_error_t *
956filter_mergeinfo_paths(void *baton, const void *key, apr_ssize_t klen,
957                       void *val, apr_pool_t *iterpool)
958{
959  struct filter_mergeinfo_paths_baton *const fb = baton;
960
961  if (apr_hash_get(fb->paths, key, klen))
962    apr_hash_set(fb->paths, key, klen, NULL);
963
964  return SVN_NO_ERROR;
965}
966
967/* Baton used by the check_mergeinfo_normalization hash iterator. */
968struct verify_mergeinfo_normalization_baton
969{
970  const char* path;
971  apr_hash_t *normalized_paths;
972  svn_membuf_t buffer;
973  svn_repos_notify_func_t notify_func;
974  void *notify_baton;
975};
976
977/* Hash iterator that verifies normalization and collision of paths in
978   an svn:mergeinfo property. */
979static svn_error_t *
980verify_mergeinfo_normalization(void *baton, const void *key, apr_ssize_t klen,
981                               void *val, apr_pool_t *iterpool)
982{
983  struct verify_mergeinfo_normalization_baton *const vb = baton;
984
985  const char *const path = key;
986  const char *normpath;
987  const char *found;
988
989  SVN_ERR(svn_utf__normalize(&normpath, path, klen, &vb->buffer));
990  found = svn_hash_gets(vb->normalized_paths, normpath);
991  if (!found)
992      svn_hash_sets(vb->normalized_paths,
993                    apr_pstrdup(vb->buffer.pool, normpath),
994                    normalized_unique);
995  else if (found == normalized_collision)
996    /* Skip already reported collision */;
997  else
998    {
999      /* Report path collision in mergeinfo */
1000      svn_hash_sets(vb->normalized_paths,
1001                    apr_pstrdup(vb->buffer.pool, normpath),
1002                    normalized_collision);
1003
1004      notify_warning(iterpool, vb->notify_func, vb->notify_baton,
1005                     svn_repos_notify_warning_mergeinfo_collision,
1006                     _("Duplicate representation of path '%s'"
1007                       " in %s property of '%s'"),
1008                     normpath, SVN_PROP_MERGEINFO, vb->path);
1009    }
1010  return SVN_NO_ERROR;
1011}
1012
1013/* Check UCS normalization of mergeinfo for PATH. NEW_MERGEINFO is the
1014   svn:mergeinfo property value being set; OLD_MERGEINFO is the
1015   previous property value, which may be NULL. Only the paths that
1016   were added in are checked, including collision checks. This
1017   minimizes the number of notifications we generate for a given
1018   mergeinfo property. */
1019static svn_error_t *
1020check_mergeinfo_normalization(const char *path,
1021                              const char *new_mergeinfo,
1022                              const char *old_mergeinfo,
1023                              svn_repos_notify_func_t notify_func,
1024                              void *notify_baton,
1025                              apr_pool_t *pool)
1026{
1027  svn_mergeinfo_t mergeinfo;
1028  apr_hash_t *normalized_paths;
1029  apr_hash_t *added_paths;
1030  struct extract_mergeinfo_paths_baton extract_baton;
1031  struct verify_mergeinfo_normalization_baton verify_baton;
1032
1033  SVN_ERR(svn_mergeinfo_parse(&mergeinfo, new_mergeinfo, pool));
1034
1035  extract_baton.result = apr_hash_make(pool);
1036  extract_baton.normalize = FALSE;
1037  svn_membuf__create(&extract_baton.buffer, 0, pool);
1038  SVN_ERR(svn_iter_apr_hash(NULL, mergeinfo,
1039                            extract_mergeinfo_paths,
1040                            &extract_baton, pool));
1041  added_paths = extract_baton.result;
1042
1043  if (old_mergeinfo)
1044    {
1045      struct filter_mergeinfo_paths_baton filter_baton;
1046      svn_mergeinfo_t oldinfo;
1047
1048      extract_baton.result = apr_hash_make(pool);
1049      extract_baton.normalize = TRUE;
1050      SVN_ERR(svn_mergeinfo_parse(&oldinfo, old_mergeinfo, pool));
1051      SVN_ERR(svn_iter_apr_hash(NULL, oldinfo,
1052                                extract_mergeinfo_paths,
1053                                &extract_baton, pool));
1054      normalized_paths = extract_baton.result;
1055
1056      filter_baton.paths = added_paths;
1057      SVN_ERR(svn_iter_apr_hash(NULL, oldinfo,
1058                                filter_mergeinfo_paths,
1059                                &filter_baton, pool));
1060    }
1061  else
1062      normalized_paths = apr_hash_make(pool);
1063
1064  verify_baton.path = path;
1065  verify_baton.normalized_paths = normalized_paths;
1066  verify_baton.buffer = extract_baton.buffer;
1067  verify_baton.notify_func = notify_func;
1068  verify_baton.notify_baton = notify_baton;
1069  SVN_ERR(svn_iter_apr_hash(NULL, added_paths,
1070                            verify_mergeinfo_normalization,
1071                            &verify_baton, pool));
1072
1073  return SVN_NO_ERROR;
1074}
1075
1076
1077/* A special case of dump_node(), for a delete record.
1078 *
1079 * The only thing special about this version is it only writes one blank
1080 * line, not two, after the headers. Why? Historical precedent for the
1081 * case where a delete record is used as part of a (delete + add-with-history)
1082 * in implementing a replacement.
1083 *
1084 * Also it doesn't do a path-tracker check.
1085 */
1086static svn_error_t *
1087dump_node_delete(svn_stream_t *stream,
1088                 const char *node_relpath,
1089                 apr_pool_t *pool)
1090{
1091  svn_repos__dumpfile_headers_t *headers
1092    = svn_repos__dumpfile_headers_create(pool);
1093
1094  /* Node-path: ... */
1095  svn_repos__dumpfile_header_push(
1096    headers, SVN_REPOS_DUMPFILE_NODE_PATH, node_relpath);
1097
1098  /* Node-action: delete */
1099  svn_repos__dumpfile_header_push(
1100    headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "delete");
1101
1102  SVN_ERR(svn_repos__dump_headers(stream, headers, pool));
1103  return SVN_NO_ERROR;
1104}
1105
1106/* This helper is the main "meat" of the editor -- it does all the
1107   work of writing a node record.
1108
1109   Write out a node record for PATH of type KIND under EB->FS_ROOT.
1110   ACTION describes what is happening to the node (see enum svn_node_action).
1111   Write record to writable EB->STREAM.
1112
1113   If the node was itself copied, IS_COPY is TRUE and the
1114   path/revision of the copy source are in CMP_PATH/CMP_REV.  If
1115   IS_COPY is FALSE, yet CMP_PATH/CMP_REV are valid, this node is part
1116   of a copied subtree.
1117  */
1118static svn_error_t *
1119dump_node(struct edit_baton *eb,
1120          const char *path,
1121          svn_node_kind_t kind,
1122          enum svn_node_action action,
1123          svn_boolean_t is_copy,
1124          const char *cmp_path,
1125          svn_revnum_t cmp_rev,
1126          apr_pool_t *pool)
1127{
1128  svn_stringbuf_t *propstring;
1129  apr_size_t len;
1130  svn_boolean_t must_dump_text = FALSE, must_dump_props = FALSE;
1131  const char *compare_path = path;
1132  svn_revnum_t compare_rev = eb->current_rev - 1;
1133  svn_fs_root_t *compare_root = NULL;
1134  apr_file_t *delta_file = NULL;
1135  svn_repos__dumpfile_headers_t *headers
1136    = svn_repos__dumpfile_headers_create(pool);
1137  svn_filesize_t textlen;
1138
1139  /* Maybe validate the path. */
1140  if (eb->verify || eb->notify_func)
1141    {
1142      svn_error_t *err = svn_fs__path_valid(path, pool);
1143
1144      if (err)
1145        {
1146          if (eb->notify_func)
1147            {
1148              char errbuf[512]; /* ### svn_strerror() magic number  */
1149
1150              notify_warning(pool, eb->notify_func, eb->notify_baton,
1151                             svn_repos_notify_warning_invalid_fspath,
1152                             _("E%06d: While validating fspath '%s': %s"),
1153                             err->apr_err, path,
1154                             svn_err_best_message(err, errbuf, sizeof(errbuf)));
1155            }
1156
1157          /* Return the error in addition to notifying about it. */
1158          if (eb->verify)
1159            return svn_error_trace(err);
1160          else
1161            svn_error_clear(err);
1162        }
1163    }
1164
1165  /* Write out metadata headers for this file node. */
1166  svn_repos__dumpfile_header_push(
1167    headers, SVN_REPOS_DUMPFILE_NODE_PATH, path);
1168  if (kind == svn_node_file)
1169    svn_repos__dumpfile_header_push(
1170      headers, SVN_REPOS_DUMPFILE_NODE_KIND, "file");
1171  else if (kind == svn_node_dir)
1172    svn_repos__dumpfile_header_push(
1173      headers, SVN_REPOS_DUMPFILE_NODE_KIND, "dir");
1174
1175  /* Remove leading slashes from copyfrom paths. */
1176  if (cmp_path)
1177    {
1178      const char *canonicalized_path;
1179      SVN_ERR(svn_relpath_canonicalize_safe(&canonicalized_path, NULL,
1180                                            cmp_path, pool, pool));
1181      cmp_path = canonicalized_path;
1182    }
1183
1184  /* Validate the comparison path/rev. */
1185  if (ARE_VALID_COPY_ARGS(cmp_path, cmp_rev))
1186    {
1187      compare_path = cmp_path;
1188      compare_rev = cmp_rev;
1189    }
1190
1191  switch (action)
1192    {
1193    case svn_node_action_change:
1194      if (eb->path_tracker)
1195        SVN_ERR_W(node_must_exist(eb, path, eb->current_rev, kind, pool),
1196                  apr_psprintf(pool, _("Change invalid path '%s' in r%ld"),
1197                               path, eb->current_rev));
1198
1199      svn_repos__dumpfile_header_push(
1200        headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "change");
1201
1202      /* either the text or props changed, or possibly both. */
1203      SVN_ERR(svn_fs_revision_root(&compare_root,
1204                                   svn_fs_root_fs(eb->fs_root),
1205                                   compare_rev, pool));
1206
1207      SVN_ERR(svn_fs_props_changed(&must_dump_props,
1208                                   compare_root, compare_path,
1209                                   eb->fs_root, path, pool));
1210      if (kind == svn_node_file)
1211        SVN_ERR(svn_fs_contents_changed(&must_dump_text,
1212                                        compare_root, compare_path,
1213                                        eb->fs_root, path, pool));
1214      break;
1215
1216    case svn_node_action_delete:
1217      if (eb->path_tracker)
1218        {
1219          SVN_ERR_W(node_must_exist(eb, path, eb->current_rev, kind, pool),
1220                    apr_psprintf(pool, _("Deleting invalid path '%s' in r%ld"),
1221                                 path, eb->current_rev));
1222          tracker_path_delete(eb->path_tracker, path);
1223        }
1224
1225      svn_repos__dumpfile_header_push(
1226        headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "delete");
1227
1228      /* we can leave this routine quietly now, don't need to dump
1229         any content. */
1230      must_dump_text = FALSE;
1231      must_dump_props = FALSE;
1232      break;
1233
1234    case svn_node_action_replace:
1235      if (eb->path_tracker)
1236        SVN_ERR_W(node_must_exist(eb, path, eb->current_rev,
1237                                  svn_node_unknown, pool),
1238                  apr_psprintf(pool,
1239                               _("Replacing non-existent path '%s' in r%ld"),
1240                               path, eb->current_rev));
1241
1242      if (! is_copy)
1243        {
1244          if (eb->path_tracker)
1245            tracker_path_replace(eb->path_tracker, path);
1246
1247          /* a simple delete+add, implied by a single 'replace' action. */
1248          svn_repos__dumpfile_header_push(
1249            headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "replace");
1250
1251          /* definitely need to dump all content for a replace. */
1252          if (kind == svn_node_file)
1253            must_dump_text = TRUE;
1254          must_dump_props = TRUE;
1255          break;
1256        }
1257      else
1258        {
1259          /* more complex:  delete original, then add-with-history.  */
1260          /* ### Why not write a 'replace' record? Don't know. */
1261
1262          if (eb->path_tracker)
1263            {
1264              tracker_path_delete(eb->path_tracker, path);
1265            }
1266
1267          /* ### Unusually, we end this 'delete' node record with only a single
1268                 blank line after the header block -- no extra blank line. */
1269          SVN_ERR(dump_node_delete(eb->stream, path, pool));
1270
1271          /* The remaining action is a non-replacing add-with-history */
1272          /* action = svn_node_action_add; */
1273        }
1274      /* FALL THROUGH to 'add' */
1275
1276    case svn_node_action_add:
1277      if (eb->path_tracker)
1278        SVN_ERR_W(node_must_not_exist(eb, path, eb->current_rev, pool),
1279                  apr_psprintf(pool,
1280                               _("Adding already existing path '%s' in r%ld"),
1281                               path, eb->current_rev));
1282
1283      svn_repos__dumpfile_header_push(
1284        headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "add");
1285
1286      if (! is_copy)
1287        {
1288          if (eb->path_tracker)
1289            tracker_path_add(eb->path_tracker, path);
1290
1291          /* Dump all contents for a simple 'add'. */
1292          if (kind == svn_node_file)
1293            must_dump_text = TRUE;
1294          must_dump_props = TRUE;
1295        }
1296      else
1297        {
1298          if (eb->path_tracker)
1299            {
1300              SVN_ERR_W(node_must_exist(eb, compare_path, compare_rev,
1301                                        kind, pool),
1302                        apr_psprintf(pool,
1303                                     _("Copying from invalid path to "
1304                                       "'%s' in r%ld"),
1305                                     path, eb->current_rev));
1306              tracker_path_copy(eb->path_tracker, path, compare_path,
1307                                compare_rev);
1308            }
1309
1310          if (!eb->verify && cmp_rev < eb->oldest_dumped_rev
1311              && eb->notify_func)
1312            {
1313              notify_warning(pool, eb->notify_func, eb->notify_baton,
1314                             svn_repos_notify_warning_found_old_reference,
1315                             _("Referencing data in revision %ld,"
1316                               " which is older than the oldest"
1317                               " dumped revision (r%ld).  Loading this dump"
1318                               " into an empty repository"
1319                               " will fail."),
1320                             cmp_rev, eb->oldest_dumped_rev);
1321              if (eb->found_old_reference)
1322                *eb->found_old_reference = TRUE;
1323            }
1324
1325          svn_repos__dumpfile_header_pushf(
1326            headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV, "%ld", cmp_rev);
1327          svn_repos__dumpfile_header_push(
1328            headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH, cmp_path);
1329
1330          SVN_ERR(svn_fs_revision_root(&compare_root,
1331                                       svn_fs_root_fs(eb->fs_root),
1332                                       compare_rev, pool));
1333
1334          /* Need to decide if the copied node had any extra textual or
1335             property mods as well.  */
1336          SVN_ERR(svn_fs_props_changed(&must_dump_props,
1337                                       compare_root, compare_path,
1338                                       eb->fs_root, path, pool));
1339          if (kind == svn_node_file)
1340            {
1341              svn_checksum_t *checksum;
1342              const char *hex_digest;
1343              SVN_ERR(svn_fs_contents_changed(&must_dump_text,
1344                                              compare_root, compare_path,
1345                                              eb->fs_root, path, pool));
1346
1347              SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
1348                                           compare_root, compare_path,
1349                                           FALSE, pool));
1350              hex_digest = svn_checksum_to_cstring(checksum, pool);
1351              if (hex_digest)
1352                svn_repos__dumpfile_header_push(
1353                  headers, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_MD5, hex_digest);
1354
1355              SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
1356                                           compare_root, compare_path,
1357                                           FALSE, pool));
1358              hex_digest = svn_checksum_to_cstring(checksum, pool);
1359              if (hex_digest)
1360                svn_repos__dumpfile_header_push(
1361                  headers, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_SHA1, hex_digest);
1362            }
1363        }
1364      break;
1365    }
1366
1367  if ((! must_dump_text) && (! must_dump_props))
1368    {
1369      /* If we're not supposed to dump text or props, so be it, we can
1370         just go home.  However, if either one needs to be dumped,
1371         then our dumpstream format demands that at a *minimum*, we
1372         see a lone "PROPS-END" as a divider between text and props
1373         content within the content-block. */
1374      SVN_ERR(svn_repos__dump_headers(eb->stream, headers, pool));
1375      len = 1;
1376      return svn_stream_write(eb->stream, "\n", &len); /* ### needed? */
1377    }
1378
1379  /*** Start prepping content to dump... ***/
1380
1381  /* If we are supposed to dump properties, write out a property
1382     length header and generate a stringbuf that contains those
1383     property values here. */
1384  if (must_dump_props)
1385    {
1386      apr_hash_t *prophash, *oldhash = NULL;
1387      svn_stream_t *propstream;
1388
1389      SVN_ERR(svn_fs_node_proplist(&prophash, eb->fs_root, path, pool));
1390
1391      /* If this is a partial dump, then issue a warning if we dump mergeinfo
1392         properties that refer to revisions older than the first revision
1393         dumped. */
1394      if (!eb->verify && eb->notify_func && eb->oldest_dumped_rev > 1)
1395        {
1396          svn_string_t *mergeinfo_str = svn_hash_gets(prophash,
1397                                                      SVN_PROP_MERGEINFO);
1398          if (mergeinfo_str)
1399            {
1400              /* An error in verifying the mergeinfo must not prevent dumping
1401                 the data. Ignore any such error. */
1402              svn_error_clear(verify_mergeinfo_revisions(
1403                                eb->found_old_mergeinfo,
1404                                mergeinfo_str->data, eb->oldest_dumped_rev,
1405                                eb->notify_func, eb->notify_baton,
1406                                pool));
1407            }
1408        }
1409
1410      /* If we're checking UCS normalization, also parse any changed
1411         mergeinfo and warn about denormalized paths and name
1412         collisions there. */
1413      if (eb->verify && eb->check_normalization && eb->notify_func)
1414        {
1415          /* N.B.: This hash lookup happens only once; the conditions
1416             for verifying historic mergeinfo references and checking
1417             UCS normalization are mutually exclusive. */
1418          svn_string_t *mergeinfo_str = svn_hash_gets(prophash,
1419                                                      SVN_PROP_MERGEINFO);
1420          if (mergeinfo_str)
1421            {
1422              svn_string_t *oldinfo_str = NULL;
1423              if (compare_root)
1424                {
1425                  SVN_ERR(svn_fs_node_proplist(&oldhash,
1426                                               compare_root, compare_path,
1427                                               pool));
1428                  oldinfo_str = svn_hash_gets(oldhash, SVN_PROP_MERGEINFO);
1429                }
1430              SVN_ERR(check_mergeinfo_normalization(
1431                          path, mergeinfo_str->data,
1432                          (oldinfo_str ? oldinfo_str->data : NULL),
1433                          eb->notify_func, eb->notify_baton, pool));
1434            }
1435        }
1436
1437      if (eb->use_deltas && compare_root)
1438        {
1439          /* Fetch the old property hash to diff against and output a header
1440             saying that our property contents are a delta. */
1441          if (!oldhash)         /* May have been set for normalization check */
1442            SVN_ERR(svn_fs_node_proplist(&oldhash, compare_root, compare_path,
1443                                         pool));
1444          svn_repos__dumpfile_header_push(
1445            headers, SVN_REPOS_DUMPFILE_PROP_DELTA, "true");
1446        }
1447      else
1448        oldhash = apr_hash_make(pool);
1449      propstring = svn_stringbuf_create_ensure(0, pool);
1450      propstream = svn_stream_from_stringbuf(propstring, pool);
1451      SVN_ERR(svn_hash_write_incremental(prophash, oldhash, propstream,
1452                                         "PROPS-END", pool));
1453      SVN_ERR(svn_stream_close(propstream));
1454    }
1455
1456  /* If we are supposed to dump text, write out a text length header
1457     here, and an MD5 checksum (if available). */
1458  if (must_dump_text && (kind == svn_node_file))
1459    {
1460      svn_checksum_t *checksum;
1461      const char *hex_digest;
1462
1463      if (eb->use_deltas)
1464        {
1465          /* Compute the text delta now and write it into a temporary
1466             file, so that we can find its length.  Output a header
1467             saying our text contents are a delta. */
1468          SVN_ERR(store_delta(&delta_file, &textlen, compare_root,
1469                              compare_path, eb->fs_root, path, pool));
1470          svn_repos__dumpfile_header_push(
1471            headers, SVN_REPOS_DUMPFILE_TEXT_DELTA, "true");
1472
1473          if (compare_root)
1474            {
1475              SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
1476                                           compare_root, compare_path,
1477                                           FALSE, pool));
1478              hex_digest = svn_checksum_to_cstring(checksum, pool);
1479              if (hex_digest)
1480                svn_repos__dumpfile_header_push(
1481                  headers, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_MD5, hex_digest);
1482
1483              SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
1484                                           compare_root, compare_path,
1485                                           FALSE, pool));
1486              hex_digest = svn_checksum_to_cstring(checksum, pool);
1487              if (hex_digest)
1488                svn_repos__dumpfile_header_push(
1489                  headers, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_SHA1, hex_digest);
1490            }
1491        }
1492      else
1493        {
1494          /* Just fetch the length of the file. */
1495          SVN_ERR(svn_fs_file_length(&textlen, eb->fs_root, path, pool));
1496        }
1497
1498      SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
1499                                   eb->fs_root, path, FALSE, pool));
1500      hex_digest = svn_checksum_to_cstring(checksum, pool);
1501      if (hex_digest)
1502        svn_repos__dumpfile_header_push(
1503          headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_MD5, hex_digest);
1504
1505      SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
1506                                   eb->fs_root, path, FALSE, pool));
1507      hex_digest = svn_checksum_to_cstring(checksum, pool);
1508      if (hex_digest)
1509        svn_repos__dumpfile_header_push(
1510          headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_SHA1, hex_digest);
1511    }
1512
1513  /* 'Content-length:' is the last header before we dump the content,
1514     and is the sum of the text and prop contents lengths.  We write
1515     this only for the benefit of non-Subversion RFC-822 parsers. */
1516  SVN_ERR(svn_repos__dump_node_record(eb->stream, headers,
1517                                      must_dump_props ? propstring : NULL,
1518                                      must_dump_text,
1519                                      must_dump_text ? textlen : 0,
1520                                      TRUE /*content_length_always*/,
1521                                      pool));
1522
1523  /* Dump text content */
1524  if (must_dump_text && (kind == svn_node_file))
1525    {
1526      svn_stream_t *contents;
1527
1528      if (delta_file)
1529        {
1530          /* Make sure to close the underlying file when the stream is
1531             closed. */
1532          contents = svn_stream_from_aprfile2(delta_file, FALSE, pool);
1533        }
1534      else
1535        SVN_ERR(svn_fs_file_contents(&contents, eb->fs_root, path, pool));
1536
1537      SVN_ERR(svn_stream_copy3(contents, svn_stream_disown(eb->stream, pool),
1538                               NULL, NULL, pool));
1539    }
1540
1541  len = 2;
1542  return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */
1543}
1544
1545
1546static svn_error_t *
1547open_root(void *edit_baton,
1548          svn_revnum_t base_revision,
1549          apr_pool_t *pool,
1550          void **root_baton)
1551{
1552  return svn_error_trace(make_dir_baton((struct dir_baton **)root_baton,
1553                                        NULL, NULL, SVN_INVALID_REVNUM,
1554                                        edit_baton, NULL, pool));
1555}
1556
1557
1558static svn_error_t *
1559delete_entry(const char *path,
1560             svn_revnum_t revision,
1561             void *parent_baton,
1562             apr_pool_t *pool)
1563{
1564  struct dir_baton *pb = parent_baton;
1565  const char *mypath = apr_pstrdup(pb->pool, path);
1566
1567  /* remember this path needs to be deleted. */
1568  svn_hash_sets(pb->deleted_entries, mypath, pb);
1569
1570  return SVN_NO_ERROR;
1571}
1572
1573
1574static svn_error_t *
1575add_directory(const char *path,
1576              void *parent_baton,
1577              const char *copyfrom_path,
1578              svn_revnum_t copyfrom_rev,
1579              apr_pool_t *pool,
1580              void **child_baton)
1581{
1582  struct dir_baton *pb = parent_baton;
1583  struct edit_baton *eb = pb->edit_baton;
1584  void *was_deleted;
1585  svn_boolean_t is_copy = FALSE;
1586  struct dir_baton *new_db;
1587
1588  SVN_ERR(make_dir_baton(&new_db, path, copyfrom_path, copyfrom_rev, eb,
1589                         pb, pool));
1590
1591  /* This might be a replacement -- is the path already deleted? */
1592  was_deleted = svn_hash_gets(pb->deleted_entries, path);
1593
1594  /* Detect an add-with-history. */
1595  is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev);
1596
1597  /* Dump the node. */
1598  SVN_ERR(dump_node(eb, path,
1599                    svn_node_dir,
1600                    was_deleted ? svn_node_action_replace : svn_node_action_add,
1601                    is_copy,
1602                    is_copy ? copyfrom_path : NULL,
1603                    is_copy ? copyfrom_rev : SVN_INVALID_REVNUM,
1604                    pool));
1605
1606  if (was_deleted)
1607    /* Delete the path, it's now been dumped. */
1608    svn_hash_sets(pb->deleted_entries, path, NULL);
1609
1610  /* Check for normalized name clashes, but only if this is actually a
1611     new name in the parent, not a replacement. */
1612  if (!was_deleted && eb->verify && eb->check_normalization && eb->notify_func)
1613    {
1614      pb->check_name_collision = TRUE;
1615    }
1616
1617  new_db->written_out = TRUE;
1618
1619  *child_baton = new_db;
1620  return SVN_NO_ERROR;
1621}
1622
1623
1624static svn_error_t *
1625open_directory(const char *path,
1626               void *parent_baton,
1627               svn_revnum_t base_revision,
1628               apr_pool_t *pool,
1629               void **child_baton)
1630{
1631  struct dir_baton *pb = parent_baton;
1632  struct edit_baton *eb = pb->edit_baton;
1633  struct dir_baton *new_db;
1634  const char *cmp_path = NULL;
1635  svn_revnum_t cmp_rev = SVN_INVALID_REVNUM;
1636
1637  /* If the parent directory has explicit comparison path and rev,
1638     record the same for this one. */
1639  if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev))
1640    {
1641      cmp_path = svn_relpath_join(pb->cmp_path,
1642                                  svn_relpath_basename(path, pool), pool);
1643      cmp_rev = pb->cmp_rev;
1644    }
1645
1646  SVN_ERR(make_dir_baton(&new_db, path, cmp_path, cmp_rev, eb, pb, pool));
1647  *child_baton = new_db;
1648  return SVN_NO_ERROR;
1649}
1650
1651
1652static svn_error_t *
1653close_directory(void *dir_baton,
1654                apr_pool_t *pool)
1655{
1656  struct dir_baton *db = dir_baton;
1657  struct edit_baton *eb = db->edit_baton;
1658  apr_pool_t *subpool = svn_pool_create(pool);
1659  int i;
1660  apr_array_header_t *sorted_entries;
1661
1662  /* Sort entries lexically instead of as paths. Even though the entries
1663   * are full paths they're all in the same directory (see comment in struct
1664   * dir_baton definition). So we really want to sort by basename, in which
1665   * case the lexical sort function is more efficient. */
1666  sorted_entries = svn_sort__hash(db->deleted_entries,
1667                                  svn_sort_compare_items_lexically, pool);
1668  for (i = 0; i < sorted_entries->nelts; i++)
1669    {
1670      const char *path = APR_ARRAY_IDX(sorted_entries, i,
1671                                       svn_sort__item_t).key;
1672
1673      svn_pool_clear(subpool);
1674
1675      /* By sending 'svn_node_unknown', the Node-kind: header simply won't
1676         be written out.  No big deal at all, really.  The loader
1677         shouldn't care.  */
1678      SVN_ERR(dump_node(eb, path,
1679                        svn_node_unknown, svn_node_action_delete,
1680                        FALSE, NULL, SVN_INVALID_REVNUM, subpool));
1681    }
1682
1683  svn_pool_destroy(subpool);
1684  return SVN_NO_ERROR;
1685}
1686
1687
1688static svn_error_t *
1689add_file(const char *path,
1690         void *parent_baton,
1691         const char *copyfrom_path,
1692         svn_revnum_t copyfrom_rev,
1693         apr_pool_t *pool,
1694         void **file_baton)
1695{
1696  struct dir_baton *pb = parent_baton;
1697  struct edit_baton *eb = pb->edit_baton;
1698  void *was_deleted;
1699  svn_boolean_t is_copy = FALSE;
1700
1701  /* This might be a replacement -- is the path already deleted? */
1702  was_deleted = svn_hash_gets(pb->deleted_entries, path);
1703
1704  /* Detect add-with-history. */
1705  is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev);
1706
1707  /* Dump the node. */
1708  SVN_ERR(dump_node(eb, path,
1709                    svn_node_file,
1710                    was_deleted ? svn_node_action_replace : svn_node_action_add,
1711                    is_copy,
1712                    is_copy ? copyfrom_path : NULL,
1713                    is_copy ? copyfrom_rev : SVN_INVALID_REVNUM,
1714                    pool));
1715
1716  if (was_deleted)
1717    /* delete the path, it's now been dumped. */
1718    svn_hash_sets(pb->deleted_entries, path, NULL);
1719
1720  /* Check for normalized name clashes, but only if this is actually a
1721     new name in the parent, not a replacement. */
1722  if (!was_deleted && eb->verify && eb->check_normalization && eb->notify_func)
1723    {
1724      pb->check_name_collision = TRUE;
1725    }
1726
1727  *file_baton = NULL;  /* muhahahaha */
1728  return SVN_NO_ERROR;
1729}
1730
1731
1732static svn_error_t *
1733open_file(const char *path,
1734          void *parent_baton,
1735          svn_revnum_t ancestor_revision,
1736          apr_pool_t *pool,
1737          void **file_baton)
1738{
1739  struct dir_baton *pb = parent_baton;
1740  struct edit_baton *eb = pb->edit_baton;
1741  const char *cmp_path = NULL;
1742  svn_revnum_t cmp_rev = SVN_INVALID_REVNUM;
1743
1744  /* If the parent directory has explicit comparison path and rev,
1745     record the same for this one. */
1746  if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev))
1747    {
1748      cmp_path = svn_relpath_join(pb->cmp_path,
1749                                  svn_relpath_basename(path, pool), pool);
1750      cmp_rev = pb->cmp_rev;
1751    }
1752
1753  SVN_ERR(dump_node(eb, path,
1754                    svn_node_file, svn_node_action_change,
1755                    FALSE, cmp_path, cmp_rev, pool));
1756
1757  *file_baton = NULL;  /* muhahahaha again */
1758  return SVN_NO_ERROR;
1759}
1760
1761
1762static svn_error_t *
1763change_dir_prop(void *parent_baton,
1764                const char *name,
1765                const svn_string_t *value,
1766                apr_pool_t *pool)
1767{
1768  struct dir_baton *db = parent_baton;
1769  struct edit_baton *eb = db->edit_baton;
1770
1771  /* This function is what distinguishes between a directory that is
1772     opened to merely get somewhere, vs. one that is opened because it
1773     *actually* changed by itself.
1774
1775     Instead of recording the prop changes here, we just use this method
1776     to trigger writing the node; dump_node() finds all the changes. */
1777  if (! db->written_out)
1778    {
1779      SVN_ERR(dump_node(eb, db->path,
1780                        svn_node_dir, svn_node_action_change,
1781                        /* ### We pass is_copy=FALSE; this might be wrong
1782                           but the parameter isn't used when action=change. */
1783                        FALSE, db->cmp_path, db->cmp_rev, pool));
1784      db->written_out = TRUE;
1785    }
1786  return SVN_NO_ERROR;
1787}
1788
1789static svn_error_t *
1790fetch_props_func(apr_hash_t **props,
1791                 void *baton,
1792                 const char *path,
1793                 svn_revnum_t base_revision,
1794                 apr_pool_t *result_pool,
1795                 apr_pool_t *scratch_pool)
1796{
1797  struct edit_baton *eb = baton;
1798  svn_error_t *err;
1799  svn_fs_root_t *fs_root;
1800
1801  if (!SVN_IS_VALID_REVNUM(base_revision))
1802    base_revision = eb->current_rev - 1;
1803
1804  SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
1805
1806  err = svn_fs_node_proplist(props, fs_root, path, result_pool);
1807  if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND)
1808    {
1809      svn_error_clear(err);
1810      *props = apr_hash_make(result_pool);
1811      return SVN_NO_ERROR;
1812    }
1813  else if (err)
1814    return svn_error_trace(err);
1815
1816  return SVN_NO_ERROR;
1817}
1818
1819static svn_error_t *
1820fetch_kind_func(svn_node_kind_t *kind,
1821                void *baton,
1822                const char *path,
1823                svn_revnum_t base_revision,
1824                apr_pool_t *scratch_pool)
1825{
1826  struct edit_baton *eb = baton;
1827  svn_fs_root_t *fs_root;
1828
1829  if (!SVN_IS_VALID_REVNUM(base_revision))
1830    base_revision = eb->current_rev - 1;
1831
1832  SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
1833
1834  SVN_ERR(svn_fs_check_path(kind, fs_root, path, scratch_pool));
1835
1836  return SVN_NO_ERROR;
1837}
1838
1839static svn_error_t *
1840fetch_base_func(const char **filename,
1841                void *baton,
1842                const char *path,
1843                svn_revnum_t base_revision,
1844                apr_pool_t *result_pool,
1845                apr_pool_t *scratch_pool)
1846{
1847  struct edit_baton *eb = baton;
1848  svn_stream_t *contents;
1849  svn_stream_t *file_stream;
1850  const char *tmp_filename;
1851  svn_error_t *err;
1852  svn_fs_root_t *fs_root;
1853
1854  if (!SVN_IS_VALID_REVNUM(base_revision))
1855    base_revision = eb->current_rev - 1;
1856
1857  SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
1858
1859  err = svn_fs_file_contents(&contents, fs_root, path, scratch_pool);
1860  if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND)
1861    {
1862      svn_error_clear(err);
1863      *filename = NULL;
1864      return SVN_NO_ERROR;
1865    }
1866  else if (err)
1867    return svn_error_trace(err);
1868  SVN_ERR(svn_stream_open_unique(&file_stream, &tmp_filename, NULL,
1869                                 svn_io_file_del_on_pool_cleanup,
1870                                 scratch_pool, scratch_pool));
1871  SVN_ERR(svn_stream_copy3(contents, file_stream, NULL, NULL, scratch_pool));
1872
1873  *filename = apr_pstrdup(result_pool, tmp_filename);
1874
1875  return SVN_NO_ERROR;
1876}
1877
1878
1879static svn_error_t *
1880get_dump_editor(const svn_delta_editor_t **editor,
1881                void **edit_baton,
1882                svn_fs_t *fs,
1883                svn_revnum_t to_rev,
1884                const char *root_path,
1885                svn_stream_t *stream,
1886                svn_boolean_t *found_old_reference,
1887                svn_boolean_t *found_old_mergeinfo,
1888                svn_error_t *(*custom_close_directory)(void *dir_baton,
1889                                  apr_pool_t *scratch_pool),
1890                svn_repos_notify_func_t notify_func,
1891                void *notify_baton,
1892                svn_revnum_t oldest_dumped_rev,
1893                svn_boolean_t use_deltas,
1894                svn_boolean_t verify,
1895                svn_boolean_t check_normalization,
1896                apr_pool_t *pool)
1897{
1898  /* Allocate an edit baton to be stored in every directory baton.
1899     Set it up for the directory baton we create here, which is the
1900     root baton. */
1901  struct edit_baton *eb = apr_pcalloc(pool, sizeof(*eb));
1902  svn_delta_editor_t *dump_editor = svn_delta_default_editor(pool);
1903  svn_delta_shim_callbacks_t *shim_callbacks =
1904                                svn_delta_shim_callbacks_default(pool);
1905
1906  /* Set up the edit baton. */
1907  eb->stream = stream;
1908  eb->notify_func = notify_func;
1909  eb->notify_baton = notify_baton;
1910  eb->oldest_dumped_rev = oldest_dumped_rev;
1911  eb->path = apr_pstrdup(pool, root_path);
1912  SVN_ERR(svn_fs_revision_root(&(eb->fs_root), fs, to_rev, pool));
1913  eb->fs = fs;
1914  eb->current_rev = to_rev;
1915  eb->use_deltas = use_deltas;
1916  eb->verify = verify;
1917  eb->check_normalization = check_normalization;
1918  eb->found_old_reference = found_old_reference;
1919  eb->found_old_mergeinfo = found_old_mergeinfo;
1920
1921  /* In non-verification mode, we will allow anything to be dumped because
1922     it might be an incremental dump with possible manual intervention.
1923     Also, this might be the last resort when it comes to data recovery.
1924
1925     Else, make sure that all paths exists at their respective revisions.
1926  */
1927  eb->path_tracker = verify ? tracker_create(to_rev, pool) : NULL;
1928
1929  /* Set up the editor. */
1930  dump_editor->open_root = open_root;
1931  dump_editor->delete_entry = delete_entry;
1932  dump_editor->add_directory = add_directory;
1933  dump_editor->open_directory = open_directory;
1934  if (custom_close_directory)
1935    dump_editor->close_directory = custom_close_directory;
1936  else
1937    dump_editor->close_directory = close_directory;
1938  dump_editor->change_dir_prop = change_dir_prop;
1939  dump_editor->add_file = add_file;
1940  dump_editor->open_file = open_file;
1941
1942  *edit_baton = eb;
1943  *editor = dump_editor;
1944
1945  shim_callbacks->fetch_kind_func = fetch_kind_func;
1946  shim_callbacks->fetch_props_func = fetch_props_func;
1947  shim_callbacks->fetch_base_func = fetch_base_func;
1948  shim_callbacks->fetch_baton = eb;
1949
1950  SVN_ERR(svn_editor__insert_shims(editor, edit_baton, *editor, *edit_baton,
1951                                   NULL, NULL, shim_callbacks, pool, pool));
1952
1953  return SVN_NO_ERROR;
1954}
1955
1956/*----------------------------------------------------------------------*/
1957
1958/** The main dumping routine, svn_repos_dump_fs. **/
1959
1960
1961/* Helper for svn_repos_dump_fs.
1962
1963   Write a revision record of REV in REPOS to writable STREAM, using POOL.
1964   Dump revision properties as well if INCLUDE_REVPROPS has been set.
1965   AUTHZ_FUNC and AUTHZ_BATON are passed directly to the repos layer.
1966 */
1967static svn_error_t *
1968write_revision_record(svn_stream_t *stream,
1969                      svn_repos_t *repos,
1970                      svn_revnum_t rev,
1971                      svn_boolean_t include_revprops,
1972                      svn_repos_authz_func_t authz_func,
1973                      void *authz_baton,
1974                      apr_pool_t *pool)
1975{
1976  apr_hash_t *props;
1977
1978  if (include_revprops)
1979    {
1980      SVN_ERR(svn_repos_fs_revision_proplist(&props, repos, rev,
1981                                             authz_func, authz_baton, pool));
1982    }
1983   else
1984    {
1985      /* Although we won't use it, we still need this container for the
1986         call below. */
1987      props = apr_hash_make(pool);
1988    }
1989
1990  SVN_ERR(svn_repos__dump_revision_record(stream, rev, NULL, props,
1991                                          include_revprops,
1992                                          pool));
1993  return SVN_NO_ERROR;
1994}
1995
1996/* Baton for dump_filter_authz_func(). */
1997typedef struct dump_filter_baton_t
1998{
1999  svn_repos_dump_filter_func_t filter_func;
2000  void *filter_baton;
2001} dump_filter_baton_t;
2002
2003/* Implements svn_repos_authz_func_t. */
2004static svn_error_t *
2005dump_filter_authz_func(svn_boolean_t *allowed,
2006                       svn_fs_root_t *root,
2007                       const char *path,
2008                       void *baton,
2009                       apr_pool_t *pool)
2010{
2011  dump_filter_baton_t *b = baton;
2012
2013  /* For some nodes (e.g. files under copied directory) PATH may be
2014   * non-canonical (missing leading '/').  Canonicalize PATH before
2015   * passing it to FILTER_FUNC. */
2016  path = svn_fspath__canonicalize(path, pool);
2017
2018  return svn_error_trace(b->filter_func(allowed, root, path, b->filter_baton,
2019                                        pool));
2020}
2021
2022
2023
2024/* The main dumper. */
2025svn_error_t *
2026svn_repos_dump_fs4(svn_repos_t *repos,
2027                   svn_stream_t *stream,
2028                   svn_revnum_t start_rev,
2029                   svn_revnum_t end_rev,
2030                   svn_boolean_t incremental,
2031                   svn_boolean_t use_deltas,
2032                   svn_boolean_t include_revprops,
2033                   svn_boolean_t include_changes,
2034                   svn_repos_notify_func_t notify_func,
2035                   void *notify_baton,
2036                   svn_repos_dump_filter_func_t filter_func,
2037                   void *filter_baton,
2038                   svn_cancel_func_t cancel_func,
2039                   void *cancel_baton,
2040                   apr_pool_t *pool)
2041{
2042  const svn_delta_editor_t *dump_editor;
2043  void *dump_edit_baton = NULL;
2044  svn_revnum_t rev;
2045  svn_fs_t *fs = svn_repos_fs(repos);
2046  apr_pool_t *iterpool = svn_pool_create(pool);
2047  svn_revnum_t youngest;
2048  const char *uuid;
2049  int version;
2050  svn_boolean_t found_old_reference = FALSE;
2051  svn_boolean_t found_old_mergeinfo = FALSE;
2052  svn_repos_notify_t *notify;
2053  svn_repos_authz_func_t authz_func;
2054  dump_filter_baton_t authz_baton = {0};
2055
2056  /* Make sure we catch up on the latest revprop changes.  This is the only
2057   * time we will refresh the revprop data in this query. */
2058  SVN_ERR(svn_fs_refresh_revision_props(fs, pool));
2059
2060  /* Determine the current youngest revision of the filesystem. */
2061  SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool));
2062
2063  /* Use default vals if necessary. */
2064  if (! SVN_IS_VALID_REVNUM(start_rev))
2065    start_rev = 0;
2066  if (! SVN_IS_VALID_REVNUM(end_rev))
2067    end_rev = youngest;
2068  if (! stream)
2069    stream = svn_stream_empty(pool);
2070
2071  /* Validate the revisions. */
2072  if (start_rev > end_rev)
2073    return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
2074                             _("Start revision %ld"
2075                               " is greater than end revision %ld"),
2076                             start_rev, end_rev);
2077  if (end_rev > youngest)
2078    return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
2079                             _("End revision %ld is invalid "
2080                               "(youngest revision is %ld)"),
2081                             end_rev, youngest);
2082
2083  /* We use read authz callback to implement dump filtering. If there is no
2084   * read access for some node, it will be excluded from dump as well as
2085   * references to it (e.g. copy source). */
2086  if (filter_func)
2087    {
2088      authz_func = dump_filter_authz_func;
2089      authz_baton.filter_func = filter_func;
2090      authz_baton.filter_baton = filter_baton;
2091    }
2092  else
2093    {
2094      authz_func = NULL;
2095    }
2096
2097  /* Write out the UUID. */
2098  SVN_ERR(svn_fs_get_uuid(fs, &uuid, pool));
2099
2100  /* If we're not using deltas, use the previous version, for
2101     compatibility with svn 1.0.x. */
2102  version = SVN_REPOS_DUMPFILE_FORMAT_VERSION;
2103  if (!use_deltas)
2104    version--;
2105
2106  /* Write out "general" metadata for the dumpfile.  In this case, a
2107     magic header followed by a dumpfile format version. */
2108  SVN_ERR(svn_repos__dump_magic_header_record(stream, version, pool));
2109  SVN_ERR(svn_repos__dump_uuid_header_record(stream, uuid, pool));
2110
2111  /* Create a notify object that we can reuse in the loop. */
2112  if (notify_func)
2113    notify = svn_repos_notify_create(svn_repos_notify_dump_rev_end,
2114                                     pool);
2115
2116  /* Main loop:  we're going to dump revision REV.  */
2117  for (rev = start_rev; rev <= end_rev; rev++)
2118    {
2119      svn_fs_root_t *to_root;
2120      svn_boolean_t use_deltas_for_rev;
2121
2122      svn_pool_clear(iterpool);
2123
2124      /* Check for cancellation. */
2125      if (cancel_func)
2126        SVN_ERR(cancel_func(cancel_baton));
2127
2128      /* Write the revision record. */
2129      SVN_ERR(write_revision_record(stream, repos, rev, include_revprops,
2130                                    authz_func, &authz_baton, iterpool));
2131
2132      /* When dumping revision 0, we just write out the revision record.
2133         The parser might want to use its properties.
2134         If we don't want revision changes at all, skip in any case. */
2135      if (rev == 0 || !include_changes)
2136        goto loop_end;
2137
2138      /* Fetch the editor which dumps nodes to a file.  Regardless of
2139         what we've been told, don't use deltas for the first rev of a
2140         non-incremental dump. */
2141      use_deltas_for_rev = use_deltas && (incremental || rev != start_rev);
2142      SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton, fs, rev,
2143                              "", stream, &found_old_reference,
2144                              &found_old_mergeinfo, NULL,
2145                              notify_func, notify_baton,
2146                              start_rev, use_deltas_for_rev, FALSE, FALSE,
2147                              iterpool));
2148
2149      /* Drive the editor in one way or another. */
2150      SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, iterpool));
2151
2152      /* If this is the first revision of a non-incremental dump,
2153         we're in for a full tree dump.  Otherwise, we want to simply
2154         replay the revision.  */
2155      if ((rev == start_rev) && (! incremental))
2156        {
2157          /* Compare against revision 0, so everything appears to be added. */
2158          svn_fs_root_t *from_root;
2159          SVN_ERR(svn_fs_revision_root(&from_root, fs, 0, iterpool));
2160          SVN_ERR(svn_repos_dir_delta2(from_root, "", "",
2161                                       to_root, "",
2162                                       dump_editor, dump_edit_baton,
2163                                       authz_func, &authz_baton,
2164                                       FALSE, /* don't send text-deltas */
2165                                       svn_depth_infinity,
2166                                       FALSE, /* don't send entry props */
2167                                       FALSE, /* don't ignore ancestry */
2168                                       iterpool));
2169        }
2170      else
2171        {
2172          /* The normal case: compare consecutive revs. */
2173          SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE,
2174                                    dump_editor, dump_edit_baton,
2175                                    authz_func, &authz_baton, iterpool));
2176
2177          /* While our editor close_edit implementation is a no-op, we still
2178             do this for completeness. */
2179          SVN_ERR(dump_editor->close_edit(dump_edit_baton, iterpool));
2180        }
2181
2182    loop_end:
2183      if (notify_func)
2184        {
2185          notify->revision = rev;
2186          notify_func(notify_baton, notify, iterpool);
2187        }
2188    }
2189
2190  if (notify_func)
2191    {
2192      /* Did we issue any warnings about references to revisions older than
2193         the oldest dumped revision?  If so, then issue a final generic
2194         warning, since the inline warnings already issued might easily be
2195         missed. */
2196
2197      notify = svn_repos_notify_create(svn_repos_notify_dump_end, iterpool);
2198      notify_func(notify_baton, notify, iterpool);
2199
2200      if (found_old_reference)
2201        {
2202          notify_warning(iterpool, notify_func, notify_baton,
2203                         svn_repos_notify_warning_found_old_reference,
2204                         _("The range of revisions dumped "
2205                           "contained references to "
2206                           "copy sources outside that "
2207                           "range."));
2208        }
2209
2210      /* Ditto if we issued any warnings about old revisions referenced
2211         in dumped mergeinfo. */
2212      if (found_old_mergeinfo)
2213        {
2214          notify_warning(iterpool, notify_func, notify_baton,
2215                         svn_repos_notify_warning_found_old_mergeinfo,
2216                         _("The range of revisions dumped "
2217                           "contained mergeinfo "
2218                           "which reference revisions outside "
2219                           "that range."));
2220        }
2221    }
2222
2223  svn_pool_destroy(iterpool);
2224
2225  return SVN_NO_ERROR;
2226}
2227
2228
2229/*----------------------------------------------------------------------*/
2230
2231/* verify, based on dump */
2232
2233
2234/* Creating a new revision that changes /A/B/E/bravo means creating new
2235   directory listings for /, /A, /A/B, and /A/B/E in the new revision, with
2236   each entry not changed in the new revision a link back to the entry in a
2237   previous revision.  svn_repos_replay()ing a revision does not verify that
2238   those links are correct.
2239
2240   For paths actually changed in the revision we verify, we get directory
2241   contents or file length twice: once in the dump editor, and once here.
2242   We could create a new verify baton, store in it the changed paths, and
2243   skip those here, but that means building an entire wrapper editor and
2244   managing two levels of batons.  The impact from checking these entries
2245   twice should be minimal, while the code to avoid it is not.
2246*/
2247
2248static svn_error_t *
2249verify_directory_entry(void *baton, const void *key, apr_ssize_t klen,
2250                       void *val, apr_pool_t *pool)
2251{
2252  struct dir_baton *db = baton;
2253  svn_fs_dirent_t *dirent = (svn_fs_dirent_t *)val;
2254  char *path;
2255  svn_boolean_t right_kind;
2256
2257  path = svn_relpath_join(db->path, (const char *)key, pool);
2258
2259  /* since we can't access the directory entries directly by their ID,
2260     we need to navigate from the FS_ROOT to them (relatively expensive
2261     because we may start at a never rev than the last change to node).
2262     We check that the node kind stored in the noderev matches the dir
2263     entry.  This also ensures that all entries point to valid noderevs.
2264   */
2265  switch (dirent->kind) {
2266  case svn_node_dir:
2267    SVN_ERR(svn_fs_is_dir(&right_kind, db->edit_baton->fs_root, path, pool));
2268    if (!right_kind)
2269      return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
2270                               _("Node '%s' is not a directory."),
2271                               path);
2272
2273    break;
2274  case svn_node_file:
2275    SVN_ERR(svn_fs_is_file(&right_kind, db->edit_baton->fs_root, path, pool));
2276    if (!right_kind)
2277      return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
2278                               _("Node '%s' is not a file."),
2279                               path);
2280    break;
2281  default:
2282    return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
2283                             _("Unexpected node kind %d for '%s'"),
2284                             dirent->kind, path);
2285  }
2286
2287  return SVN_NO_ERROR;
2288}
2289
2290/* Baton used by the check_name_collision hash iterator. */
2291struct check_name_collision_baton
2292{
2293  struct dir_baton *dir_baton;
2294  apr_hash_t *normalized;
2295  svn_membuf_t buffer;
2296};
2297
2298/* Scan the directory and report all entry names that differ only in
2299   Unicode character representation. */
2300static svn_error_t *
2301check_name_collision(void *baton, const void *key, apr_ssize_t klen,
2302                     void *val, apr_pool_t *iterpool)
2303{
2304  struct check_name_collision_baton *const cb = baton;
2305  const char *name;
2306  const char *found;
2307
2308  SVN_ERR(svn_utf__normalize(&name, key, klen, &cb->buffer));
2309
2310  found = svn_hash_gets(cb->normalized, name);
2311  if (!found)
2312    svn_hash_sets(cb->normalized, apr_pstrdup(cb->buffer.pool, name),
2313                  normalized_unique);
2314  else if (found == normalized_collision)
2315    /* Skip already reported collision */;
2316  else
2317    {
2318      struct dir_baton *const db = cb->dir_baton;
2319      struct edit_baton *const eb = db->edit_baton;
2320      const char* normpath;
2321
2322      svn_hash_sets(cb->normalized, apr_pstrdup(cb->buffer.pool, name),
2323                    normalized_collision);
2324
2325      SVN_ERR(svn_utf__normalize(
2326                  &normpath, svn_relpath_join(db->path, name, iterpool),
2327                  SVN_UTF__UNKNOWN_LENGTH, &cb->buffer));
2328      notify_warning(iterpool, eb->notify_func, eb->notify_baton,
2329                     svn_repos_notify_warning_name_collision,
2330                     _("Duplicate representation of path '%s'"), normpath);
2331    }
2332  return SVN_NO_ERROR;
2333}
2334
2335
2336static svn_error_t *
2337verify_close_directory(void *dir_baton, apr_pool_t *pool)
2338{
2339  struct dir_baton *db = dir_baton;
2340  apr_hash_t *dirents;
2341  SVN_ERR(svn_fs_dir_entries(&dirents, db->edit_baton->fs_root,
2342                             db->path, pool));
2343  SVN_ERR(svn_iter_apr_hash(NULL, dirents, verify_directory_entry,
2344                            dir_baton, pool));
2345
2346  if (db->check_name_collision)
2347    {
2348      struct check_name_collision_baton check_baton;
2349      check_baton.dir_baton = db;
2350      check_baton.normalized = apr_hash_make(pool);
2351      svn_membuf__create(&check_baton.buffer, 0, pool);
2352      SVN_ERR(svn_iter_apr_hash(NULL, dirents, check_name_collision,
2353                                &check_baton, pool));
2354    }
2355
2356  return close_directory(dir_baton, pool);
2357}
2358
2359/* Verify revision REV in file system FS. */
2360static svn_error_t *
2361verify_one_revision(svn_fs_t *fs,
2362                    svn_revnum_t rev,
2363                    svn_repos_notify_func_t notify_func,
2364                    void *notify_baton,
2365                    svn_revnum_t start_rev,
2366                    svn_boolean_t check_normalization,
2367                    svn_cancel_func_t cancel_func,
2368                    void *cancel_baton,
2369                    apr_pool_t *scratch_pool)
2370{
2371  const svn_delta_editor_t *dump_editor;
2372  void *dump_edit_baton;
2373  svn_fs_root_t *to_root;
2374  apr_hash_t *props;
2375  const svn_delta_editor_t *cancel_editor;
2376  void *cancel_edit_baton;
2377
2378  /* Get cancellable dump editor, but with our close_directory handler.*/
2379  SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton,
2380                          fs, rev, "",
2381                          svn_stream_empty(scratch_pool),
2382                          NULL, NULL,
2383                          verify_close_directory,
2384                          notify_func, notify_baton,
2385                          start_rev,
2386                          FALSE, TRUE, /* use_deltas, verify */
2387                          check_normalization,
2388                          scratch_pool));
2389  SVN_ERR(svn_delta_get_cancellation_editor(cancel_func, cancel_baton,
2390                                            dump_editor, dump_edit_baton,
2391                                            &cancel_editor,
2392                                            &cancel_edit_baton,
2393                                            scratch_pool));
2394  SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, scratch_pool));
2395  SVN_ERR(svn_fs_verify_root(to_root, scratch_pool));
2396  SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE,
2397                            cancel_editor, cancel_edit_baton,
2398                            NULL, NULL, scratch_pool));
2399
2400  /* While our editor close_edit implementation is a no-op, we still
2401     do this for completeness. */
2402  SVN_ERR(cancel_editor->close_edit(cancel_edit_baton, scratch_pool));
2403
2404  SVN_ERR(svn_fs_revision_proplist2(&props, fs, rev, FALSE, scratch_pool,
2405                                    scratch_pool));
2406
2407  return SVN_NO_ERROR;
2408}
2409
2410/* Baton type used for forwarding notifications from FS API to REPOS API. */
2411struct verify_fs_notify_func_baton_t
2412{
2413   /* notification function to call (must not be NULL) */
2414   svn_repos_notify_func_t notify_func;
2415
2416   /* baton to use for it */
2417   void *notify_baton;
2418
2419   /* type of notification to send (we will simply plug in the revision) */
2420   svn_repos_notify_t *notify;
2421};
2422
2423/* Forward the notification to BATON. */
2424static void
2425verify_fs_notify_func(svn_revnum_t revision,
2426                       void *baton,
2427                       apr_pool_t *pool)
2428{
2429  struct verify_fs_notify_func_baton_t *notify_baton = baton;
2430
2431  notify_baton->notify->revision = revision;
2432  notify_baton->notify_func(notify_baton->notify_baton,
2433                            notify_baton->notify, pool);
2434}
2435
2436static svn_error_t *
2437report_error(svn_revnum_t revision,
2438             svn_error_t *verify_err,
2439             svn_repos_verify_callback_t verify_callback,
2440             void *verify_baton,
2441             apr_pool_t *pool)
2442{
2443  if (verify_callback)
2444    {
2445      svn_error_t *cb_err;
2446
2447      /* The caller provided us with a callback, so make him responsible
2448         for what's going to happen with the error. */
2449      cb_err = verify_callback(verify_baton, revision, verify_err, pool);
2450      svn_error_clear(verify_err);
2451      SVN_ERR(cb_err);
2452
2453      return SVN_NO_ERROR;
2454    }
2455  else
2456    {
2457      /* No callback -- no second guessing.  Just return the error. */
2458      return svn_error_trace(verify_err);
2459    }
2460}
2461
2462svn_error_t *
2463svn_repos_verify_fs3(svn_repos_t *repos,
2464                     svn_revnum_t start_rev,
2465                     svn_revnum_t end_rev,
2466                     svn_boolean_t check_normalization,
2467                     svn_boolean_t metadata_only,
2468                     svn_repos_notify_func_t notify_func,
2469                     void *notify_baton,
2470                     svn_repos_verify_callback_t verify_callback,
2471                     void *verify_baton,
2472                     svn_cancel_func_t cancel_func,
2473                     void *cancel_baton,
2474                     apr_pool_t *pool)
2475{
2476  svn_fs_t *fs = svn_repos_fs(repos);
2477  svn_revnum_t youngest;
2478  svn_revnum_t rev;
2479  apr_pool_t *iterpool = svn_pool_create(pool);
2480  svn_repos_notify_t *notify;
2481  svn_fs_progress_notify_func_t verify_notify = NULL;
2482  struct verify_fs_notify_func_baton_t *verify_notify_baton = NULL;
2483  svn_error_t *err;
2484
2485  /* Make sure we catch up on the latest revprop changes.  This is the only
2486   * time we will refresh the revprop data in this query. */
2487  SVN_ERR(svn_fs_refresh_revision_props(fs, pool));
2488
2489  /* Determine the current youngest revision of the filesystem. */
2490  SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool));
2491
2492  /* Use default vals if necessary. */
2493  if (! SVN_IS_VALID_REVNUM(start_rev))
2494    start_rev = 0;
2495  if (! SVN_IS_VALID_REVNUM(end_rev))
2496    end_rev = youngest;
2497
2498  /* Validate the revisions. */
2499  if (start_rev > end_rev)
2500    return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
2501                             _("Start revision %ld"
2502                               " is greater than end revision %ld"),
2503                             start_rev, end_rev);
2504  if (end_rev > youngest)
2505    return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
2506                             _("End revision %ld is invalid "
2507                               "(youngest revision is %ld)"),
2508                             end_rev, youngest);
2509
2510  /* Create a notify object that we can reuse within the loop and a
2511     forwarding structure for notifications from inside svn_fs_verify(). */
2512  if (notify_func)
2513    {
2514      notify = svn_repos_notify_create(svn_repos_notify_verify_rev_end, pool);
2515
2516      verify_notify = verify_fs_notify_func;
2517      verify_notify_baton = apr_palloc(pool, sizeof(*verify_notify_baton));
2518      verify_notify_baton->notify_func = notify_func;
2519      verify_notify_baton->notify_baton = notify_baton;
2520      verify_notify_baton->notify
2521        = svn_repos_notify_create(svn_repos_notify_verify_rev_structure, pool);
2522    }
2523
2524  /* Verify global metadata and backend-specific data first. */
2525  err = svn_fs_verify(svn_fs_path(fs, pool), svn_fs_config(fs, pool),
2526                      start_rev, end_rev,
2527                      verify_notify, verify_notify_baton,
2528                      cancel_func, cancel_baton, pool);
2529
2530  if (err && err->apr_err == SVN_ERR_CANCELLED)
2531    {
2532      return svn_error_trace(err);
2533    }
2534  else if (err)
2535    {
2536      SVN_ERR(report_error(SVN_INVALID_REVNUM, err, verify_callback,
2537                           verify_baton, iterpool));
2538    }
2539
2540  if (!metadata_only)
2541    for (rev = start_rev; rev <= end_rev; rev++)
2542      {
2543        svn_pool_clear(iterpool);
2544
2545        /* Wrapper function to catch the possible errors. */
2546        err = verify_one_revision(fs, rev, notify_func, notify_baton,
2547                                  start_rev, check_normalization,
2548                                  cancel_func, cancel_baton,
2549                                  iterpool);
2550
2551        if (err && err->apr_err == SVN_ERR_CANCELLED)
2552          {
2553            return svn_error_trace(err);
2554          }
2555        else if (err)
2556          {
2557            SVN_ERR(report_error(rev, err, verify_callback, verify_baton,
2558                                 iterpool));
2559          }
2560        else if (notify_func)
2561          {
2562            /* Tell the caller that we're done with this revision. */
2563            notify->revision = rev;
2564            notify_func(notify_baton, notify, iterpool);
2565          }
2566      }
2567
2568  /* We're done. */
2569  if (notify_func)
2570    {
2571      notify = svn_repos_notify_create(svn_repos_notify_verify_end, iterpool);
2572      notify_func(notify_baton, notify, iterpool);
2573    }
2574
2575  svn_pool_destroy(iterpool);
2576
2577  return SVN_NO_ERROR;
2578}
2579