svndumpfilter.c revision 299742
1/*
2 * svndumpfilter.c: Subversion dump stream filtering tool main file.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <stdlib.h>
26
27#include <apr_file_io.h>
28
29#include "svn_private_config.h"
30#include "svn_cmdline.h"
31#include "svn_error.h"
32#include "svn_string.h"
33#include "svn_opt.h"
34#include "svn_utf.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_hash.h"
38#include "svn_repos.h"
39#include "svn_fs.h"
40#include "svn_pools.h"
41#include "svn_sorts.h"
42#include "svn_props.h"
43#include "svn_mergeinfo.h"
44#include "svn_version.h"
45
46#include "private/svn_repos_private.h"
47#include "private/svn_mergeinfo_private.h"
48#include "private/svn_cmdline_private.h"
49#include "private/svn_sorts_private.h"
50
51#ifdef _WIN32
52typedef apr_status_t (__stdcall *open_fn_t)(apr_file_t **, apr_pool_t *);
53#else
54typedef apr_status_t (*open_fn_t)(apr_file_t **, apr_pool_t *);
55#endif
56
57/*** Code. ***/
58
59/* Helper to open stdio streams */
60
61/* NOTE: we used to call svn_stream_from_stdio(), which wraps a stream
62   around a standard stdio.h FILE pointer.  The problem is that these
63   pointers operate through C Run Time (CRT) on Win32, which does all
64   sorts of translation on them: LF's become CRLF's, and ctrl-Z's
65   embedded in Word documents are interpreted as premature EOF's.
66
67   So instead, we use apr_file_open_std*, which bypass the CRT and
68   directly wrap the OS's file-handles, which don't know or care about
69   translation.  Thus dump/load works correctly on Win32.
70*/
71static svn_error_t *
72create_stdio_stream(svn_stream_t **stream,
73                    open_fn_t open_fn,
74                    apr_pool_t *pool)
75{
76  apr_file_t *stdio_file;
77  apr_status_t apr_err = open_fn(&stdio_file, pool);
78
79  if (apr_err)
80    return svn_error_wrap_apr(apr_err, _("Can't open stdio file"));
81
82  *stream = svn_stream_from_aprfile2(stdio_file, TRUE, pool);
83  return SVN_NO_ERROR;
84}
85
86
87/* Writes a property in dumpfile format to given stringbuf. */
88static void
89write_prop_to_stringbuf(svn_stringbuf_t *strbuf,
90                        const char *name,
91                        const svn_string_t *value)
92{
93  int bytes_used;
94  size_t namelen;
95  char buf[SVN_KEYLINE_MAXLEN];
96
97  /* Output name length, then name. */
98  namelen = strlen(name);
99  svn_stringbuf_appendbytes(strbuf, "K ", 2);
100
101  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
102  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
103  svn_stringbuf_appendbyte(strbuf, '\n');
104
105  svn_stringbuf_appendbytes(strbuf, name, namelen);
106  svn_stringbuf_appendbyte(strbuf, '\n');
107
108  /* Output value length, then value. */
109  svn_stringbuf_appendbytes(strbuf, "V ", 2);
110
111  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, value->len);
112  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
113  svn_stringbuf_appendbyte(strbuf, '\n');
114
115  svn_stringbuf_appendbytes(strbuf, value->data, value->len);
116  svn_stringbuf_appendbyte(strbuf, '\n');
117}
118
119
120/* Writes a property deletion in dumpfile format to given stringbuf. */
121static void
122write_propdel_to_stringbuf(svn_stringbuf_t **strbuf,
123                           const char *name)
124{
125  int bytes_used;
126  size_t namelen;
127  char buf[SVN_KEYLINE_MAXLEN];
128
129  /* Output name length, then name. */
130  namelen = strlen(name);
131  svn_stringbuf_appendbytes(*strbuf, "D ", 2);
132
133  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
134  svn_stringbuf_appendbytes(*strbuf, buf, bytes_used);
135  svn_stringbuf_appendbyte(*strbuf, '\n');
136
137  svn_stringbuf_appendbytes(*strbuf, name, namelen);
138  svn_stringbuf_appendbyte(*strbuf, '\n');
139}
140
141
142/* Compare the node-path PATH with the (const char *) prefixes in PFXLIST.
143 * Return TRUE if any prefix is a prefix of PATH (matching whole path
144 * components); FALSE otherwise.
145 * PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
146static svn_boolean_t
147ary_prefix_match(const apr_array_header_t *pfxlist, const char *path)
148{
149  int i;
150  size_t path_len = strlen(path);
151
152  for (i = 0; i < pfxlist->nelts; i++)
153    {
154      const char *pfx = APR_ARRAY_IDX(pfxlist, i, const char *);
155      size_t pfx_len = strlen(pfx);
156
157      if (path_len < pfx_len)
158        continue;
159      if (strncmp(path, pfx, pfx_len) == 0
160          && (pfx_len == 1 || path[pfx_len] == '\0' || path[pfx_len] == '/'))
161        return TRUE;
162    }
163
164  return FALSE;
165}
166
167
168/* Check whether we need to skip this PATH based on its presence in
169   the PREFIXES list, and the DO_EXCLUDE option.
170   PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
171static APR_INLINE svn_boolean_t
172skip_path(const char *path, const apr_array_header_t *prefixes,
173          svn_boolean_t do_exclude, svn_boolean_t glob)
174{
175  const svn_boolean_t matches =
176    (glob
177     ? svn_cstring_match_glob_list(path, prefixes)
178     : ary_prefix_match(prefixes, path));
179
180  /* NXOR */
181  return (matches ? do_exclude : !do_exclude);
182}
183
184
185
186/* Note: the input stream parser calls us with events.
187   Output of the filtered dump occurs for the most part streamily with the
188   event callbacks, to avoid caching large quantities of data in memory.
189   The exceptions this are:
190   - All revision data (headers and props) must be cached until a non-skipped
191     node within the revision is found, or the revision is closed.
192   - Node headers and props must be cached until all props have been received
193     (to allow the Prop-content-length to be found). This is signalled either
194     by the node text arriving, or the node being closed.
195   The writing_begun members of the associated object batons track the state.
196   output_revision() and output_node() are called to cause this flushing of
197   cached data to occur.
198*/
199
200
201/* Filtering batons */
202
203struct revmap_t
204{
205  svn_revnum_t rev; /* Last non-dropped revision to which this maps. */
206  svn_boolean_t was_dropped; /* Was this revision dropped? */
207};
208
209struct parse_baton_t
210{
211  /* Command-line options values. */
212  svn_boolean_t do_exclude;
213  svn_boolean_t quiet;
214  svn_boolean_t glob;
215  svn_boolean_t drop_empty_revs;
216  svn_boolean_t drop_all_empty_revs;
217  svn_boolean_t do_renumber_revs;
218  svn_boolean_t preserve_revprops;
219  svn_boolean_t skip_missing_merge_sources;
220  svn_boolean_t allow_deltas;
221  apr_array_header_t *prefixes;
222
223  /* Input and output streams. */
224  svn_stream_t *in_stream;
225  svn_stream_t *out_stream;
226
227  /* State for the filtering process. */
228  apr_int32_t rev_drop_count;
229  apr_hash_t *dropped_nodes;
230  apr_hash_t *renumber_history;  /* svn_revnum_t -> struct revmap_t */
231  svn_revnum_t last_live_revision;
232  /* The oldest original revision, greater than r0, in the input
233     stream which was not filtered. */
234  svn_revnum_t oldest_original_rev;
235};
236
237struct revision_baton_t
238{
239  /* Reference to the global parse baton. */
240  struct parse_baton_t *pb;
241
242  /* Does this revision have node or prop changes? */
243  svn_boolean_t has_nodes;
244
245  /* Did we drop any nodes? */
246  svn_boolean_t had_dropped_nodes;
247
248  /* Written to output stream? */
249  svn_boolean_t writing_begun;
250
251  /* The original and new (re-mapped) revision numbers. */
252  svn_revnum_t rev_orig;
253  svn_revnum_t rev_actual;
254
255  /* Pointers to dumpfile data. */
256  apr_hash_t *original_headers;
257  apr_hash_t *props;
258};
259
260struct node_baton_t
261{
262  /* Reference to the current revision baton. */
263  struct revision_baton_t *rb;
264
265  /* Are we skipping this node? */
266  svn_boolean_t do_skip;
267
268  /* Have we been instructed to change or remove props on, or change
269     the text of, this node? */
270  svn_boolean_t has_props;
271  svn_boolean_t has_text;
272
273  /* Written to output stream? */
274  svn_boolean_t writing_begun;
275
276  /* The text content length according to the dumpfile headers, because we
277     need the length before we have the actual text. */
278  svn_filesize_t tcl;
279
280  /* Pointers to dumpfile data. */
281  svn_repos__dumpfile_headers_t *headers;
282  svn_stringbuf_t *props;
283
284  /* Expect deltas? */
285  svn_boolean_t has_prop_delta;
286  svn_boolean_t has_text_delta;
287
288  /* We might need the node path in a parse error message. */
289  char *node_path;
290
291  apr_pool_t *node_pool;
292};
293
294
295
296/* Filtering vtable members */
297
298/* File-format stamp. */
299static svn_error_t *
300magic_header_record(int version, void *parse_baton, apr_pool_t *pool)
301{
302  struct parse_baton_t *pb = parse_baton;
303
304  if (version >= SVN_REPOS_DUMPFILE_FORMAT_VERSION_DELTAS)
305    pb->allow_deltas = TRUE;
306
307  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
308                            SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n",
309                            version));
310
311  return SVN_NO_ERROR;
312}
313
314
315/* Return a deep copy of a (char * -> char *) hash. */
316static apr_hash_t *
317headers_dup(apr_hash_t *headers,
318            apr_pool_t *pool)
319{
320  apr_hash_t *new_hash = apr_hash_make(pool);
321  apr_hash_index_t *hi;
322
323  for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
324    {
325      const char *key = apr_hash_this_key(hi);
326      const char *val = apr_hash_this_val(hi);
327
328      svn_hash_sets(new_hash, apr_pstrdup(pool, key), apr_pstrdup(pool, val));
329    }
330  return new_hash;
331}
332
333/* New revision: set up revision_baton, decide if we skip it. */
334static svn_error_t *
335new_revision_record(void **revision_baton,
336                    apr_hash_t *headers,
337                    void *parse_baton,
338                    apr_pool_t *pool)
339{
340  struct revision_baton_t *rb;
341  const char *rev_orig;
342
343  *revision_baton = apr_palloc(pool, sizeof(struct revision_baton_t));
344  rb = *revision_baton;
345  rb->pb = parse_baton;
346  rb->has_nodes = FALSE;
347  rb->had_dropped_nodes = FALSE;
348  rb->writing_begun = FALSE;
349  rb->props = apr_hash_make(pool);
350  rb->original_headers = headers_dup(headers, pool);
351
352  rev_orig = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER);
353  rb->rev_orig = SVN_STR_TO_REV(rev_orig);
354
355  if (rb->pb->do_renumber_revs)
356    rb->rev_actual = rb->rev_orig - rb->pb->rev_drop_count;
357  else
358    rb->rev_actual = rb->rev_orig;
359
360  return SVN_NO_ERROR;
361}
362
363
364/* Output revision to dumpstream
365   This may be called by new_node_record(), iff rb->has_nodes has been set
366   to TRUE, or by close_revision() otherwise. This must only be called
367   if rb->writing_begun is FALSE. */
368static svn_error_t *
369output_revision(struct revision_baton_t *rb)
370{
371  svn_boolean_t write_out_rev = FALSE;
372  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
373  apr_pool_t *subpool = svn_pool_create(hash_pool);
374
375  rb->writing_begun = TRUE;
376
377  /* If this revision has no nodes left because the ones it had were
378     dropped, and we are not dropping empty revisions, and we were not
379     told to preserve revision props, then we want to fixup the
380     revision props to only contain:
381       - the date
382       - a log message that reports that this revision is just stuffing. */
383  if ((! rb->pb->preserve_revprops)
384      && (! rb->has_nodes)
385      && rb->had_dropped_nodes
386      && (! rb->pb->drop_empty_revs)
387      && (! rb->pb->drop_all_empty_revs))
388    {
389      apr_hash_t *old_props = rb->props;
390      rb->props = apr_hash_make(hash_pool);
391      svn_hash_sets(rb->props, SVN_PROP_REVISION_DATE,
392                    svn_hash_gets(old_props, SVN_PROP_REVISION_DATE));
393      svn_hash_sets(rb->props, SVN_PROP_REVISION_LOG,
394                    svn_string_create(_("This is an empty revision for "
395                                        "padding."), hash_pool));
396    }
397
398  /* write out the revision */
399  /* Revision is written out in the following cases:
400     1. If the revision has nodes or
401     it is revision 0 (Special case: To preserve the props on r0).
402     2. --drop-empty-revs has been supplied,
403     but revision has not all nodes dropped.
404     3. If no --drop-empty-revs or --drop-all-empty-revs have been supplied,
405     write out the revision which has no nodes to begin with.
406  */
407  if (rb->has_nodes || (rb->rev_orig == 0))
408    write_out_rev = TRUE;
409  else if (rb->pb->drop_empty_revs)
410    write_out_rev = ! rb->had_dropped_nodes;
411  else if (! rb->pb->drop_all_empty_revs)
412    write_out_rev = TRUE;
413
414  if (write_out_rev)
415    {
416      /* This revision is a keeper. */
417      SVN_ERR(svn_repos__dump_revision_record(rb->pb->out_stream,
418                                              rb->rev_actual,
419                                              rb->original_headers,
420                                              rb->props,
421                                              FALSE /*props_section_always*/,
422                                              subpool));
423
424      /* Stash the oldest original rev not dropped. */
425      if (rb->rev_orig > 0
426          && !SVN_IS_VALID_REVNUM(rb->pb->oldest_original_rev))
427        rb->pb->oldest_original_rev = rb->rev_orig;
428
429      if (rb->pb->do_renumber_revs)
430        {
431          svn_revnum_t *rr_key;
432          struct revmap_t *rr_val;
433          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
434          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
435          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
436          *rr_key = rb->rev_orig;
437          rr_val->rev = rb->rev_actual;
438          rr_val->was_dropped = FALSE;
439          apr_hash_set(rb->pb->renumber_history, rr_key,
440                       sizeof(*rr_key), rr_val);
441          rb->pb->last_live_revision = rb->rev_actual;
442        }
443
444      if (! rb->pb->quiet)
445        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
446                                    _("Revision %ld committed as %ld.\n"),
447                                    rb->rev_orig, rb->rev_actual));
448    }
449  else
450    {
451      /* We're dropping this revision. */
452      rb->pb->rev_drop_count++;
453      if (rb->pb->do_renumber_revs)
454        {
455          svn_revnum_t *rr_key;
456          struct revmap_t *rr_val;
457          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
458          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
459          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
460          *rr_key = rb->rev_orig;
461          rr_val->rev = rb->pb->last_live_revision;
462          rr_val->was_dropped = TRUE;
463          apr_hash_set(rb->pb->renumber_history, rr_key,
464                       sizeof(*rr_key), rr_val);
465        }
466
467      if (! rb->pb->quiet)
468        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
469                                    _("Revision %ld skipped.\n"),
470                                    rb->rev_orig));
471    }
472  svn_pool_destroy(subpool);
473  return SVN_NO_ERROR;
474}
475
476
477/* UUID record here: dump it, as we do not filter them. */
478static svn_error_t *
479uuid_record(const char *uuid, void *parse_baton, apr_pool_t *pool)
480{
481  struct parse_baton_t *pb = parse_baton;
482  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
483                            SVN_REPOS_DUMPFILE_UUID ": %s\n\n", uuid));
484  return SVN_NO_ERROR;
485}
486
487
488/* New node here. Set up node_baton by copying headers. */
489static svn_error_t *
490new_node_record(void **node_baton,
491                apr_hash_t *headers,
492                void *rev_baton,
493                apr_pool_t *pool)
494{
495  struct parse_baton_t *pb;
496  struct node_baton_t *nb;
497  char *node_path, *copyfrom_path;
498  apr_hash_index_t *hi;
499  const char *tcl;
500
501  *node_baton = apr_palloc(pool, sizeof(struct node_baton_t));
502  nb          = *node_baton;
503  nb->rb      = rev_baton;
504  nb->node_pool = pool;
505  pb          = nb->rb->pb;
506
507  node_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH);
508  copyfrom_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH);
509
510  /* Ensure that paths start with a leading '/'. */
511  if (node_path[0] != '/')
512    node_path = apr_pstrcat(pool, "/", node_path, SVN_VA_NULL);
513  if (copyfrom_path && copyfrom_path[0] != '/')
514    copyfrom_path = apr_pstrcat(pool, "/", copyfrom_path, SVN_VA_NULL);
515
516  nb->do_skip = skip_path(node_path, pb->prefixes,
517                          pb->do_exclude, pb->glob);
518
519  /* If we're skipping the node, take note of path, discarding the
520     rest.  */
521  if (nb->do_skip)
522    {
523      svn_hash_sets(pb->dropped_nodes,
524                    apr_pstrdup(apr_hash_pool_get(pb->dropped_nodes),
525                                node_path),
526                    (void *)1);
527      nb->rb->had_dropped_nodes = TRUE;
528    }
529  else
530    {
531      const char *kind;
532      const char *action;
533
534      tcl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
535
536      /* Test if this node was copied from dropped source. */
537      if (copyfrom_path &&
538          skip_path(copyfrom_path, pb->prefixes, pb->do_exclude, pb->glob))
539        {
540          /* This node was copied from a dropped source.
541             We have a problem, since we did not want to drop this node too.
542
543             However, there is one special case we'll handle.  If the node is
544             a file, and this was a copy-and-modify operation, then the
545             dumpfile should contain the new contents of the file.  In this
546             scenario, we'll just do an add without history using the new
547             contents.  */
548          kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
549
550          /* If there is a Text-content-length header, and the kind is
551             "file", we just fallback to an add without history. */
552          if (tcl && (strcmp(kind, "file") == 0))
553            {
554              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH,
555                            NULL);
556              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
557                            NULL);
558              copyfrom_path = NULL;
559            }
560          /* Else, this is either a directory or a file whose contents we
561             don't have readily available.  */
562          else
563            {
564              return svn_error_createf
565                (SVN_ERR_INCOMPLETE_DATA, 0,
566                 _("Invalid copy source path '%s'"), copyfrom_path);
567            }
568        }
569
570      nb->has_props = FALSE;
571      nb->has_text = FALSE;
572      nb->has_prop_delta = FALSE;
573      nb->has_text_delta = FALSE;
574      nb->writing_begun = FALSE;
575      nb->tcl = tcl ? svn__atoui64(tcl) : 0;
576      nb->headers = svn_repos__dumpfile_headers_create(pool);
577      nb->props = svn_stringbuf_create_empty(pool);
578      nb->node_path = apr_pstrdup(pool, node_path);
579
580      /* Now we know for sure that we have a node that will not be
581         skipped, flush the revision if it has not already been done. */
582      nb->rb->has_nodes = TRUE;
583      if (! nb->rb->writing_begun)
584        SVN_ERR(output_revision(nb->rb));
585
586      /* A node record is required to begin with 'Node-path', skip the
587         leading '/' to match the form used by 'svnadmin dump'. */
588      svn_repos__dumpfile_header_push(
589        nb->headers, SVN_REPOS_DUMPFILE_NODE_PATH, node_path + 1);
590
591      /* Node-kind is next and is optional. */
592      kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
593      if (kind)
594        svn_repos__dumpfile_header_push(
595          nb->headers, SVN_REPOS_DUMPFILE_NODE_KIND, kind);
596
597      /* Node-action is next and required. */
598      action = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_ACTION);
599      if (action)
600        svn_repos__dumpfile_header_push(
601          nb->headers, SVN_REPOS_DUMPFILE_NODE_ACTION, action);
602      else
603        return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
604                                 _("Missing Node-action for path '%s'"),
605                                 node_path);
606
607      for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
608        {
609          const char *key = apr_hash_this_key(hi);
610          const char *val = apr_hash_this_val(hi);
611
612          if ((!strcmp(key, SVN_REPOS_DUMPFILE_PROP_DELTA))
613              && (!strcmp(val, "true")))
614            nb->has_prop_delta = TRUE;
615
616          if ((!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_DELTA))
617              && (!strcmp(val, "true")))
618            nb->has_text_delta = TRUE;
619
620          if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
621              || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
622              || (!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH))
623              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_PATH))
624              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_KIND))
625              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_ACTION)))
626            continue;
627
628          /* Rewrite Node-Copyfrom-Rev if we are renumbering revisions.
629             The number points to some revision in the past. We keep track
630             of revision renumbering in an apr_hash, which maps original
631             revisions to new ones. Dropped revision are mapped to -1.
632             This should never happen here.
633          */
634          if (pb->do_renumber_revs
635              && (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV)))
636            {
637              svn_revnum_t cf_orig_rev;
638              struct revmap_t *cf_renum_val;
639
640              cf_orig_rev = SVN_STR_TO_REV(val);
641              cf_renum_val = apr_hash_get(pb->renumber_history,
642                                          &cf_orig_rev,
643                                          sizeof(svn_revnum_t));
644              if (! (cf_renum_val && SVN_IS_VALID_REVNUM(cf_renum_val->rev)))
645                return svn_error_createf
646                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
647                   _("No valid copyfrom revision in filtered stream"));
648              svn_repos__dumpfile_header_pushf(
649                nb->headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
650                "%ld", cf_renum_val->rev);
651              continue;
652            }
653
654          /* passthru: put header straight to output */
655          svn_repos__dumpfile_header_push(nb->headers, key, val);
656        }
657    }
658
659  return SVN_NO_ERROR;
660}
661
662
663/* Examine the mergeinfo in INITIAL_VAL, omitting missing merge
664   sources or renumbering revisions in rangelists as appropriate, and
665   return the (possibly new) mergeinfo in *FINAL_VAL (allocated from
666   POOL). */
667static svn_error_t *
668adjust_mergeinfo(svn_string_t **final_val, const svn_string_t *initial_val,
669                 struct revision_baton_t *rb, apr_pool_t *pool)
670{
671  apr_hash_t *mergeinfo;
672  apr_hash_t *final_mergeinfo = apr_hash_make(pool);
673  apr_hash_index_t *hi;
674  apr_pool_t *subpool = svn_pool_create(pool);
675
676  SVN_ERR(svn_mergeinfo_parse(&mergeinfo, initial_val->data, subpool));
677
678  /* Issue #3020: If we are skipping missing merge sources, then also
679     filter mergeinfo ranges as old or older than the oldest revision in the
680     dump stream.  Those older than the oldest obviously refer to history
681     outside of the dump stream.  The oldest rev itself is present in the
682     dump, but cannot be a valid merge source revision since it is the
683     start of all history.  E.g. if we dump -r100:400 then dumpfilter the
684     result with --skip-missing-merge-sources, any mergeinfo with revision
685     100 implies a change of -r99:100, but r99 is part of the history we
686     want filtered.
687
688     If the oldest rev is r0 then there is nothing to filter. */
689
690  /* ### This seems to cater only for use cases where the revisions being
691         processed are not following on from revisions that will already
692         exist in the destination repository. If the revisions being
693         processed do follow on, then we might want to keep the mergeinfo
694         that refers to those older revisions. */
695
696  if (rb->pb->skip_missing_merge_sources && rb->pb->oldest_original_rev > 0)
697    SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
698      &mergeinfo, mergeinfo,
699      rb->pb->oldest_original_rev, 0,
700      FALSE, subpool, subpool));
701
702  for (hi = apr_hash_first(subpool, mergeinfo); hi; hi = apr_hash_next(hi))
703    {
704      const char *merge_source = apr_hash_this_key(hi);
705      svn_rangelist_t *rangelist = apr_hash_this_val(hi);
706      struct parse_baton_t *pb = rb->pb;
707
708      /* Determine whether the merge_source is a part of the prefix. */
709      if (skip_path(merge_source, pb->prefixes, pb->do_exclude, pb->glob))
710        {
711          if (pb->skip_missing_merge_sources)
712            continue;
713          else
714            return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
715                                     _("Missing merge source path '%s'; try "
716                                       "with --skip-missing-merge-sources"),
717                                     merge_source);
718        }
719
720      /* Possibly renumber revisions in merge source's rangelist. */
721      if (pb->do_renumber_revs)
722        {
723          int i;
724
725          for (i = 0; i < rangelist->nelts; i++)
726            {
727              struct revmap_t *revmap_start;
728              struct revmap_t *revmap_end;
729              svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
730                                                       svn_merge_range_t *);
731
732              revmap_start = apr_hash_get(pb->renumber_history,
733                                          &range->start, sizeof(svn_revnum_t));
734              if (! (revmap_start && SVN_IS_VALID_REVNUM(revmap_start->rev)))
735                return svn_error_createf
736                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
737                   _("No valid revision range 'start' in filtered stream"));
738
739              revmap_end = apr_hash_get(pb->renumber_history,
740                                        &range->end, sizeof(svn_revnum_t));
741              if (! (revmap_end && SVN_IS_VALID_REVNUM(revmap_end->rev)))
742                return svn_error_createf
743                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
744                   _("No valid revision range 'end' in filtered stream"));
745
746              range->start = revmap_start->rev;
747              range->end = revmap_end->rev;
748            }
749        }
750      svn_hash_sets(final_mergeinfo, merge_source, rangelist);
751    }
752
753  SVN_ERR(svn_mergeinfo__canonicalize_ranges(final_mergeinfo, subpool));
754  SVN_ERR(svn_mergeinfo_to_string(final_val, final_mergeinfo, pool));
755  svn_pool_destroy(subpool);
756
757  return SVN_NO_ERROR;
758}
759
760
761static svn_error_t *
762set_revision_property(void *revision_baton,
763                      const char *name,
764                      const svn_string_t *value)
765{
766  struct revision_baton_t *rb = revision_baton;
767  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
768
769  svn_hash_sets(rb->props,
770                apr_pstrdup(hash_pool, name),
771                svn_string_dup(value, hash_pool));
772  return SVN_NO_ERROR;
773}
774
775
776static svn_error_t *
777set_node_property(void *node_baton,
778                  const char *name,
779                  const svn_string_t *value)
780{
781  struct node_baton_t *nb = node_baton;
782  struct revision_baton_t *rb = nb->rb;
783
784  if (nb->do_skip)
785    return SVN_NO_ERROR;
786
787  /* Try to detect if a delta-mode property occurs unexpectedly. HAS_PROPS
788     can be false here only if the parser didn't call remove_node_props(),
789     so this may indicate a bug rather than bad data. */
790  if (! (nb->has_props || nb->has_prop_delta))
791    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
792                             _("Delta property block detected, but deltas "
793                               "are not enabled for node '%s' in original "
794                               "revision %ld"),
795                             nb->node_path, rb->rev_orig);
796
797  if (strcmp(name, SVN_PROP_MERGEINFO) == 0)
798    {
799      svn_string_t *filtered_mergeinfo;  /* Avoid compiler warning. */
800      apr_pool_t *pool = apr_hash_pool_get(rb->props);
801      SVN_ERR(adjust_mergeinfo(&filtered_mergeinfo, value, rb, pool));
802      value = filtered_mergeinfo;
803    }
804
805  nb->has_props = TRUE;
806  write_prop_to_stringbuf(nb->props, name, value);
807
808  return SVN_NO_ERROR;
809}
810
811
812static svn_error_t *
813delete_node_property(void *node_baton, const char *name)
814{
815  struct node_baton_t *nb = node_baton;
816  struct revision_baton_t *rb = nb->rb;
817
818  if (nb->do_skip)
819    return SVN_NO_ERROR;
820
821  if (!nb->has_prop_delta)
822    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
823                             _("Delta property block detected, but deltas "
824                               "are not enabled for node '%s' in original "
825                               "revision %ld"),
826                             nb->node_path, rb->rev_orig);
827
828  nb->has_props = TRUE;
829  write_propdel_to_stringbuf(&(nb->props), name);
830
831  return SVN_NO_ERROR;
832}
833
834
835/* The parser calls this method if the node record has a non-delta
836 * property content section, before any calls to set_node_property().
837 * If the node record uses property deltas, this is not called.
838 */
839static svn_error_t *
840remove_node_props(void *node_baton)
841{
842  struct node_baton_t *nb = node_baton;
843
844  /* In this case, not actually indicating that the node *has* props,
845     rather that it has a property content section. */
846  nb->has_props = TRUE;
847
848  return SVN_NO_ERROR;
849}
850
851
852static svn_error_t *
853set_fulltext(svn_stream_t **stream, void *node_baton)
854{
855  struct node_baton_t *nb = node_baton;
856
857  if (!nb->do_skip)
858    {
859      nb->has_text = TRUE;
860      if (! nb->writing_begun)
861        {
862          nb->writing_begun = TRUE;
863          if (nb->has_props)
864            {
865              svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
866            }
867          SVN_ERR(svn_repos__dump_node_record(nb->rb->pb->out_stream,
868                                              nb->headers,
869                                              nb->has_props ? nb->props : NULL,
870                                              nb->has_text,
871                                              nb->tcl,
872                                              TRUE /*content_length_always*/,
873                                              nb->node_pool));
874        }
875      *stream = nb->rb->pb->out_stream;
876    }
877
878  return SVN_NO_ERROR;
879}
880
881
882/* Finalize node */
883static svn_error_t *
884close_node(void *node_baton)
885{
886  struct node_baton_t *nb = node_baton;
887  apr_size_t len = 2;
888
889  /* Get out of here if we can. */
890  if (nb->do_skip)
891    return SVN_NO_ERROR;
892
893  /* If the node was not flushed already to output its text, do it now. */
894  if (! nb->writing_begun)
895    {
896      nb->writing_begun = TRUE;
897      if (nb->has_props)
898        {
899          svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
900        }
901      SVN_ERR(svn_repos__dump_node_record(nb->rb->pb->out_stream,
902                                          nb->headers,
903                                          nb->has_props ? nb->props : NULL,
904                                          nb->has_text,
905                                          nb->tcl,
906                                          TRUE /*content_length_always*/,
907                                          nb->node_pool));
908    }
909
910  /* put an end to node. */
911  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream, "\n\n", &len));
912
913  return SVN_NO_ERROR;
914}
915
916
917/* Finalize revision */
918static svn_error_t *
919close_revision(void *revision_baton)
920{
921  struct revision_baton_t *rb = revision_baton;
922
923  /* If no node has yet flushed the revision, do it now. */
924  if (! rb->writing_begun)
925    return output_revision(rb);
926  else
927    return SVN_NO_ERROR;
928}
929
930
931/* Filtering vtable */
932static svn_repos_parse_fns3_t filtering_vtable =
933  {
934    magic_header_record,
935    uuid_record,
936    new_revision_record,
937    new_node_record,
938    set_revision_property,
939    set_node_property,
940    delete_node_property,
941    remove_node_props,
942    set_fulltext,
943    NULL,
944    close_node,
945    close_revision
946  };
947
948
949
950/** Subcommands. **/
951
952static svn_opt_subcommand_t
953  subcommand_help,
954  subcommand_exclude,
955  subcommand_include;
956
957enum
958  {
959    svndumpfilter__drop_empty_revs = SVN_OPT_FIRST_LONGOPT_ID,
960    svndumpfilter__drop_all_empty_revs,
961    svndumpfilter__renumber_revs,
962    svndumpfilter__preserve_revprops,
963    svndumpfilter__skip_missing_merge_sources,
964    svndumpfilter__targets,
965    svndumpfilter__quiet,
966    svndumpfilter__glob,
967    svndumpfilter__version
968  };
969
970/* Option codes and descriptions.
971 *
972 * The entire list must be terminated with an entry of nulls.
973 */
974static const apr_getopt_option_t options_table[] =
975  {
976    {"help",          'h', 0,
977     N_("show help on a subcommand")},
978
979    {NULL,            '?', 0,
980     N_("show help on a subcommand")},
981
982    {"version",            svndumpfilter__version, 0,
983     N_("show program version information") },
984    {"quiet",              svndumpfilter__quiet, 0,
985     N_("Do not display filtering statistics.") },
986    {"pattern",            svndumpfilter__glob, 0,
987     N_("Treat the path prefixes as file glob patterns.") },
988    {"drop-empty-revs",    svndumpfilter__drop_empty_revs, 0,
989     N_("Remove revisions emptied by filtering.")},
990    {"drop-all-empty-revs",    svndumpfilter__drop_all_empty_revs, 0,
991     N_("Remove all empty revisions found in dumpstream\n"
992        "                             except revision 0.")},
993    {"renumber-revs",      svndumpfilter__renumber_revs, 0,
994     N_("Renumber revisions left after filtering.") },
995    {"skip-missing-merge-sources",
996     svndumpfilter__skip_missing_merge_sources, 0,
997     N_("Skip missing merge sources.") },
998    {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
999     N_("Don't filter revision properties.") },
1000    {"targets", svndumpfilter__targets, 1,
1001     N_("Read additional prefixes, one per line, from\n"
1002        "                             file ARG.")},
1003    {NULL}
1004  };
1005
1006
1007/* Array of available subcommands.
1008 * The entire list must be terminated with an entry of nulls.
1009 */
1010static const svn_opt_subcommand_desc2_t cmd_table[] =
1011  {
1012    {"exclude", subcommand_exclude, {0},
1013     N_("Filter out nodes with given prefixes from dumpstream.\n"
1014        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
1015     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1016      svndumpfilter__renumber_revs,
1017      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1018      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1019      svndumpfilter__glob} },
1020
1021    {"include", subcommand_include, {0},
1022     N_("Filter out nodes without given prefixes from dumpstream.\n"
1023        "usage: svndumpfilter include PATH_PREFIX...\n"),
1024     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1025      svndumpfilter__renumber_revs,
1026      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1027      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1028      svndumpfilter__glob} },
1029
1030    {"help", subcommand_help, {"?", "h"},
1031     N_("Describe the usage of this program or its subcommands.\n"
1032        "usage: svndumpfilter help [SUBCOMMAND...]\n"),
1033     {0} },
1034
1035    { NULL, NULL, {0}, NULL, {0} }
1036  };
1037
1038
1039/* Baton for passing option/argument state to a subcommand function. */
1040struct svndumpfilter_opt_state
1041{
1042  svn_opt_revision_t start_revision;     /* -r X[:Y] is         */
1043  svn_opt_revision_t end_revision;       /* not implemented.    */
1044  svn_boolean_t quiet;                   /* --quiet             */
1045  svn_boolean_t glob;                    /* --pattern           */
1046  svn_boolean_t version;                 /* --version           */
1047  svn_boolean_t drop_empty_revs;         /* --drop-empty-revs   */
1048  svn_boolean_t drop_all_empty_revs;     /* --drop-all-empty-revs */
1049  svn_boolean_t help;                    /* --help or -?        */
1050  svn_boolean_t renumber_revs;           /* --renumber-revs     */
1051  svn_boolean_t preserve_revprops;       /* --preserve-revprops */
1052  svn_boolean_t skip_missing_merge_sources;
1053                                         /* --skip-missing-merge-sources */
1054  const char *targets_file;              /* --targets-file       */
1055  apr_array_header_t *prefixes;          /* mainargs.           */
1056};
1057
1058
1059static svn_error_t *
1060parse_baton_initialize(struct parse_baton_t **pb,
1061                       struct svndumpfilter_opt_state *opt_state,
1062                       svn_boolean_t do_exclude,
1063                       apr_pool_t *pool)
1064{
1065  struct parse_baton_t *baton = apr_palloc(pool, sizeof(*baton));
1066
1067  /* Read the stream from STDIN.  Users can redirect a file. */
1068  SVN_ERR(create_stdio_stream(&(baton->in_stream),
1069                              apr_file_open_stdin, pool));
1070
1071  /* Have the parser dump results to STDOUT. Users can redirect a file. */
1072  SVN_ERR(create_stdio_stream(&(baton->out_stream),
1073                              apr_file_open_stdout, pool));
1074
1075  baton->do_exclude = do_exclude;
1076
1077  /* Ignore --renumber-revs if there can't possibly be
1078     anything to renumber. */
1079  baton->do_renumber_revs =
1080    (opt_state->renumber_revs && (opt_state->drop_empty_revs
1081                                  || opt_state->drop_all_empty_revs));
1082
1083  baton->drop_empty_revs = opt_state->drop_empty_revs;
1084  baton->drop_all_empty_revs = opt_state->drop_all_empty_revs;
1085  baton->preserve_revprops = opt_state->preserve_revprops;
1086  baton->quiet = opt_state->quiet;
1087  baton->glob = opt_state->glob;
1088  baton->prefixes = opt_state->prefixes;
1089  baton->skip_missing_merge_sources = opt_state->skip_missing_merge_sources;
1090  baton->rev_drop_count = 0; /* used to shift revnums while filtering */
1091  baton->dropped_nodes = apr_hash_make(pool);
1092  baton->renumber_history = apr_hash_make(pool);
1093  baton->last_live_revision = SVN_INVALID_REVNUM;
1094  baton->oldest_original_rev = SVN_INVALID_REVNUM;
1095  baton->allow_deltas = FALSE;
1096
1097  *pb = baton;
1098  return SVN_NO_ERROR;
1099}
1100
1101/* This implements `help` subcommand. */
1102static svn_error_t *
1103subcommand_help(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1104{
1105  struct svndumpfilter_opt_state *opt_state = baton;
1106  const char *header =
1107    _("general usage: svndumpfilter SUBCOMMAND [ARGS & OPTIONS ...]\n"
1108      "Subversion repository dump filtering tool.\n"
1109      "Type 'svndumpfilter help <subcommand>' for help on a "
1110      "specific subcommand.\n"
1111      "Type 'svndumpfilter --version' to see the program version.\n"
1112      "\n"
1113      "Available subcommands:\n");
1114
1115  SVN_ERR(svn_opt_print_help4(os, "svndumpfilter",
1116                              opt_state ? opt_state->version : FALSE,
1117                              opt_state ? opt_state->quiet : FALSE,
1118                              /*###opt_state ? opt_state->verbose :*/ FALSE,
1119                              NULL, header, cmd_table, options_table,
1120                              NULL, NULL, pool));
1121
1122  return SVN_NO_ERROR;
1123}
1124
1125
1126/* Version compatibility check */
1127static svn_error_t *
1128check_lib_versions(void)
1129{
1130  static const svn_version_checklist_t checklist[] =
1131    {
1132      { "svn_subr",  svn_subr_version },
1133      { "svn_repos", svn_repos_version },
1134      { "svn_delta", svn_delta_version },
1135      { NULL, NULL }
1136    };
1137  SVN_VERSION_DEFINE(my_version);
1138
1139  return svn_ver_check_list2(&my_version, checklist, svn_ver_equal);
1140}
1141
1142
1143/* Do the real work of filtering. */
1144static svn_error_t *
1145do_filter(apr_getopt_t *os,
1146          void *baton,
1147          svn_boolean_t do_exclude,
1148          apr_pool_t *pool)
1149{
1150  struct svndumpfilter_opt_state *opt_state = baton;
1151  struct parse_baton_t *pb;
1152  apr_hash_index_t *hi;
1153  apr_array_header_t *keys;
1154  int i, num_keys;
1155
1156  if (! opt_state->quiet)
1157    {
1158      apr_pool_t *subpool = svn_pool_create(pool);
1159
1160      if (opt_state->glob)
1161        {
1162          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1163                                      do_exclude
1164                                      ? (opt_state->drop_empty_revs
1165                                         || opt_state->drop_all_empty_revs)
1166                                        ? _("Excluding (and dropping empty "
1167                                            "revisions for) prefix patterns:\n")
1168                                        : _("Excluding prefix patterns:\n")
1169                                      : (opt_state->drop_empty_revs
1170                                         || opt_state->drop_all_empty_revs)
1171                                        ? _("Including (and dropping empty "
1172                                            "revisions for) prefix patterns:\n")
1173                                        : _("Including prefix patterns:\n")));
1174        }
1175      else
1176        {
1177          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1178                                      do_exclude
1179                                      ? (opt_state->drop_empty_revs
1180                                         || opt_state->drop_all_empty_revs)
1181                                        ? _("Excluding (and dropping empty "
1182                                            "revisions for) prefixes:\n")
1183                                        : _("Excluding prefixes:\n")
1184                                      : (opt_state->drop_empty_revs
1185                                         || opt_state->drop_all_empty_revs)
1186                                        ? _("Including (and dropping empty "
1187                                            "revisions for) prefixes:\n")
1188                                        : _("Including prefixes:\n")));
1189        }
1190
1191      for (i = 0; i < opt_state->prefixes->nelts; i++)
1192        {
1193          svn_pool_clear(subpool);
1194          SVN_ERR(svn_cmdline_fprintf
1195                  (stderr, subpool, "   '%s'\n",
1196                   APR_ARRAY_IDX(opt_state->prefixes, i, const char *)));
1197        }
1198
1199      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1200      svn_pool_destroy(subpool);
1201    }
1202
1203  SVN_ERR(parse_baton_initialize(&pb, opt_state, do_exclude, pool));
1204  SVN_ERR(svn_repos_parse_dumpstream3(pb->in_stream, &filtering_vtable, pb,
1205                                      TRUE, NULL, NULL, pool));
1206
1207  /* The rest of this is just reporting.  If we aren't reporting, get
1208     outta here. */
1209  if (opt_state->quiet)
1210    return SVN_NO_ERROR;
1211
1212  SVN_ERR(svn_cmdline_fputs("\n", stderr, pool));
1213
1214  if (pb->rev_drop_count)
1215    SVN_ERR(svn_cmdline_fprintf(stderr, pool,
1216                                Q_("Dropped %d revision.\n\n",
1217                                   "Dropped %d revisions.\n\n",
1218                                   pb->rev_drop_count),
1219                                pb->rev_drop_count));
1220
1221  if (pb->do_renumber_revs)
1222    {
1223      apr_pool_t *subpool = svn_pool_create(pool);
1224      SVN_ERR(svn_cmdline_fputs(_("Revisions renumbered as follows:\n"),
1225                                stderr, subpool));
1226
1227      /* Get the keys of the hash, sort them, then print the hash keys
1228         and values, sorted by keys. */
1229      num_keys = apr_hash_count(pb->renumber_history);
1230      keys = apr_array_make(pool, num_keys + 1, sizeof(svn_revnum_t));
1231      for (hi = apr_hash_first(pool, pb->renumber_history);
1232           hi;
1233           hi = apr_hash_next(hi))
1234        {
1235          const svn_revnum_t *revnum = apr_hash_this_key(hi);
1236
1237          APR_ARRAY_PUSH(keys, svn_revnum_t) = *revnum;
1238        }
1239      svn_sort__array(keys, svn_sort_compare_revisions);
1240      for (i = 0; i < keys->nelts; i++)
1241        {
1242          svn_revnum_t this_key;
1243          struct revmap_t *this_val;
1244
1245          svn_pool_clear(subpool);
1246          this_key = APR_ARRAY_IDX(keys, i, svn_revnum_t);
1247          this_val = apr_hash_get(pb->renumber_history, &this_key,
1248                                  sizeof(this_key));
1249          if (this_val->was_dropped)
1250            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1251                                        _("   %ld => (dropped)\n"),
1252                                        this_key));
1253          else
1254            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1255                                        "   %ld => %ld\n",
1256                                        this_key, this_val->rev));
1257        }
1258      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1259      svn_pool_destroy(subpool);
1260    }
1261
1262  if ((num_keys = apr_hash_count(pb->dropped_nodes)))
1263    {
1264      apr_pool_t *subpool = svn_pool_create(pool);
1265      SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1266                                  Q_("Dropped %d node:\n",
1267                                     "Dropped %d nodes:\n",
1268                                     num_keys),
1269                                  num_keys));
1270
1271      /* Get the keys of the hash, sort them, then print the hash keys
1272         and values, sorted by keys. */
1273      keys = apr_array_make(pool, num_keys + 1, sizeof(const char *));
1274      for (hi = apr_hash_first(pool, pb->dropped_nodes);
1275           hi;
1276           hi = apr_hash_next(hi))
1277        {
1278          const char *path = apr_hash_this_key(hi);
1279
1280          APR_ARRAY_PUSH(keys, const char *) = path;
1281        }
1282      svn_sort__array(keys, svn_sort_compare_paths);
1283      for (i = 0; i < keys->nelts; i++)
1284        {
1285          svn_pool_clear(subpool);
1286          SVN_ERR(svn_cmdline_fprintf
1287                  (stderr, subpool, "   '%s'\n",
1288                   (const char *)APR_ARRAY_IDX(keys, i, const char *)));
1289        }
1290      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1291      svn_pool_destroy(subpool);
1292    }
1293
1294  return SVN_NO_ERROR;
1295}
1296
1297/* This implements `exclude' subcommand. */
1298static svn_error_t *
1299subcommand_exclude(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1300{
1301  return do_filter(os, baton, TRUE, pool);
1302}
1303
1304
1305/* This implements `include` subcommand. */
1306static svn_error_t *
1307subcommand_include(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1308{
1309  return do_filter(os, baton, FALSE, pool);
1310}
1311
1312
1313
1314/** Main. **/
1315
1316/*
1317 * On success, leave *EXIT_CODE untouched and return SVN_NO_ERROR. On error,
1318 * either return an error to be displayed, or set *EXIT_CODE to non-zero and
1319 * return SVN_NO_ERROR.
1320 */
1321static svn_error_t *
1322sub_main(int *exit_code, int argc, const char *argv[], apr_pool_t *pool)
1323{
1324  svn_error_t *err;
1325  apr_status_t apr_err;
1326
1327  const svn_opt_subcommand_desc2_t *subcommand = NULL;
1328  struct svndumpfilter_opt_state opt_state;
1329  apr_getopt_t *os;
1330  int opt_id;
1331  apr_array_header_t *received_opts;
1332  int i;
1333
1334  /* Check library versions */
1335  SVN_ERR(check_lib_versions());
1336
1337  received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int));
1338
1339  /* Initialize the FS library. */
1340  SVN_ERR(svn_fs_initialize(pool));
1341
1342  if (argc <= 1)
1343    {
1344      SVN_ERR(subcommand_help(NULL, NULL, pool));
1345      *exit_code = EXIT_FAILURE;
1346      return SVN_NO_ERROR;
1347    }
1348
1349  /* Initialize opt_state. */
1350  memset(&opt_state, 0, sizeof(opt_state));
1351  opt_state.start_revision.kind = svn_opt_revision_unspecified;
1352  opt_state.end_revision.kind = svn_opt_revision_unspecified;
1353
1354  /* Parse options. */
1355  SVN_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool));
1356
1357  os->interleave = 1;
1358  while (1)
1359    {
1360      const char *opt_arg;
1361
1362      /* Parse the next option. */
1363      apr_err = apr_getopt_long(os, options_table, &opt_id, &opt_arg);
1364      if (APR_STATUS_IS_EOF(apr_err))
1365        break;
1366      else if (apr_err)
1367        {
1368          SVN_ERR(subcommand_help(NULL, NULL, pool));
1369          *exit_code = EXIT_FAILURE;
1370          return SVN_NO_ERROR;
1371        }
1372
1373      /* Stash the option code in an array before parsing it. */
1374      APR_ARRAY_PUSH(received_opts, int) = opt_id;
1375
1376      switch (opt_id)
1377        {
1378        case 'h':
1379        case '?':
1380          opt_state.help = TRUE;
1381          break;
1382        case svndumpfilter__version:
1383          opt_state.version = TRUE;
1384          break;
1385        case svndumpfilter__quiet:
1386          opt_state.quiet = TRUE;
1387          break;
1388        case svndumpfilter__glob:
1389          opt_state.glob = TRUE;
1390          break;
1391        case svndumpfilter__drop_empty_revs:
1392          opt_state.drop_empty_revs = TRUE;
1393          break;
1394        case svndumpfilter__drop_all_empty_revs:
1395          opt_state.drop_all_empty_revs = TRUE;
1396          break;
1397        case svndumpfilter__renumber_revs:
1398          opt_state.renumber_revs = TRUE;
1399          break;
1400        case svndumpfilter__preserve_revprops:
1401          opt_state.preserve_revprops = TRUE;
1402          break;
1403        case svndumpfilter__skip_missing_merge_sources:
1404          opt_state.skip_missing_merge_sources = TRUE;
1405          break;
1406        case svndumpfilter__targets:
1407          opt_state.targets_file = opt_arg;
1408          break;
1409        default:
1410          {
1411            SVN_ERR(subcommand_help(NULL, NULL, pool));
1412            *exit_code = EXIT_FAILURE;
1413            return SVN_NO_ERROR;
1414          }
1415        }  /* close `switch' */
1416    }  /* close `while' */
1417
1418  /* Disallow simultaneous use of both --drop-empty-revs and
1419     --drop-all-empty-revs. */
1420  if (opt_state.drop_empty_revs && opt_state.drop_all_empty_revs)
1421    {
1422      return svn_error_create(SVN_ERR_CL_MUTUALLY_EXCLUSIVE_ARGS,
1423                              NULL,
1424                              _("--drop-empty-revs cannot be used with "
1425                                "--drop-all-empty-revs"));
1426    }
1427
1428  /* If the user asked for help, then the rest of the arguments are
1429     the names of subcommands to get help on (if any), or else they're
1430     just typos/mistakes.  Whatever the case, the subcommand to
1431     actually run is subcommand_help(). */
1432  if (opt_state.help)
1433    subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "help");
1434
1435  /* If we're not running the `help' subcommand, then look for a
1436     subcommand in the first argument. */
1437  if (subcommand == NULL)
1438    {
1439      if (os->ind >= os->argc)
1440        {
1441          if (opt_state.version)
1442            {
1443              /* Use the "help" subcommand to handle the "--version" option. */
1444              static const svn_opt_subcommand_desc2_t pseudo_cmd =
1445                { "--version", subcommand_help, {0}, "",
1446                  {svndumpfilter__version,  /* must accept its own option */
1447                   svndumpfilter__quiet,
1448                  } };
1449
1450              subcommand = &pseudo_cmd;
1451            }
1452          else
1453            {
1454              svn_error_clear(svn_cmdline_fprintf
1455                              (stderr, pool,
1456                               _("Subcommand argument required\n")));
1457              SVN_ERR(subcommand_help(NULL, NULL, pool));
1458              *exit_code = EXIT_FAILURE;
1459              return SVN_NO_ERROR;
1460            }
1461        }
1462      else
1463        {
1464          const char *first_arg = os->argv[os->ind++];
1465          subcommand = svn_opt_get_canonical_subcommand2(cmd_table, first_arg);
1466          if (subcommand == NULL)
1467            {
1468              const char* first_arg_utf8;
1469              SVN_ERR(svn_utf_cstring_to_utf8(&first_arg_utf8, first_arg,
1470                                              pool));
1471
1472              svn_error_clear(
1473                svn_cmdline_fprintf(stderr, pool,
1474                                    _("Unknown subcommand: '%s'\n"),
1475                                    first_arg_utf8));
1476              SVN_ERR(subcommand_help(NULL, NULL, pool));
1477              *exit_code = EXIT_FAILURE;
1478              return SVN_NO_ERROR;
1479            }
1480        }
1481    }
1482
1483  /* If there's a second argument, it's probably [one of] prefixes.
1484     Every subcommand except `help' requires at least one, so we parse
1485     them out here and store in opt_state. */
1486
1487  if (subcommand->cmd_func != subcommand_help)
1488    {
1489
1490      opt_state.prefixes = apr_array_make(pool, os->argc - os->ind,
1491                                          sizeof(const char *));
1492      for (i = os->ind ; i< os->argc; i++)
1493        {
1494          const char *prefix;
1495
1496          /* Ensure that each prefix is UTF8-encoded, in internal
1497             style, and absolute. */
1498          SVN_ERR(svn_utf_cstring_to_utf8(&prefix, os->argv[i], pool));
1499          prefix = svn_relpath__internal_style(prefix, pool);
1500          if (prefix[0] != '/')
1501            prefix = apr_pstrcat(pool, "/", prefix, SVN_VA_NULL);
1502          APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1503        }
1504
1505      if (opt_state.targets_file)
1506        {
1507          svn_stringbuf_t *buffer, *buffer_utf8;
1508          const char *utf8_targets_file;
1509          apr_array_header_t *targets = apr_array_make(pool, 0,
1510                                                       sizeof(const char *));
1511
1512          /* We need to convert to UTF-8 now, even before we divide
1513             the targets into an array, because otherwise we wouldn't
1514             know what delimiter to use for svn_cstring_split().  */
1515
1516          SVN_ERR(svn_utf_cstring_to_utf8(&utf8_targets_file,
1517                                          opt_state.targets_file, pool));
1518
1519          SVN_ERR(svn_stringbuf_from_file2(&buffer, utf8_targets_file,
1520                                           pool));
1521          SVN_ERR(svn_utf_stringbuf_to_utf8(&buffer_utf8, buffer, pool));
1522
1523          targets = apr_array_append(pool,
1524                         svn_cstring_split(buffer_utf8->data, "\n\r",
1525                                           TRUE, pool),
1526                         targets);
1527
1528          for (i = 0; i < targets->nelts; i++)
1529            {
1530              const char *prefix = APR_ARRAY_IDX(targets, i, const char *);
1531              if (prefix[0] != '/')
1532                prefix = apr_pstrcat(pool, "/", prefix, SVN_VA_NULL);
1533              APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1534            }
1535        }
1536
1537      if (apr_is_empty_array(opt_state.prefixes))
1538        {
1539          svn_error_clear(svn_cmdline_fprintf
1540                          (stderr, pool,
1541                           _("\nError: no prefixes supplied.\n")));
1542          *exit_code = EXIT_FAILURE;
1543          return SVN_NO_ERROR;
1544        }
1545    }
1546
1547
1548  /* Check that the subcommand wasn't passed any inappropriate options. */
1549  for (i = 0; i < received_opts->nelts; i++)
1550    {
1551      opt_id = APR_ARRAY_IDX(received_opts, i, int);
1552
1553      /* All commands implicitly accept --help, so just skip over this
1554         when we see it. Note that we don't want to include this option
1555         in their "accepted options" list because it would be awfully
1556         redundant to display it in every commands' help text. */
1557      if (opt_id == 'h' || opt_id == '?')
1558        continue;
1559
1560      if (! svn_opt_subcommand_takes_option3(subcommand, opt_id, NULL))
1561        {
1562          const char *optstr;
1563          const apr_getopt_option_t *badopt =
1564            svn_opt_get_option_from_code2(opt_id, options_table, subcommand,
1565                                          pool);
1566          svn_opt_format_option(&optstr, badopt, FALSE, pool);
1567          if (subcommand->name[0] == '-')
1568            SVN_ERR(subcommand_help(NULL, NULL, pool));
1569          else
1570            svn_error_clear(svn_cmdline_fprintf
1571                            (stderr, pool,
1572                             _("Subcommand '%s' doesn't accept option '%s'\n"
1573                               "Type 'svndumpfilter help %s' for usage.\n"),
1574                             subcommand->name, optstr, subcommand->name));
1575          *exit_code = EXIT_FAILURE;
1576          return SVN_NO_ERROR;
1577        }
1578    }
1579
1580  /* Run the subcommand. */
1581  err = (*subcommand->cmd_func)(os, &opt_state, pool);
1582  if (err)
1583    {
1584      /* For argument-related problems, suggest using the 'help'
1585         subcommand. */
1586      if (err->apr_err == SVN_ERR_CL_INSUFFICIENT_ARGS
1587          || err->apr_err == SVN_ERR_CL_ARG_PARSING_ERROR)
1588        {
1589          err = svn_error_quick_wrap(err,
1590                                     _("Try 'svndumpfilter help' for more "
1591                                       "info"));
1592        }
1593      return err;
1594    }
1595
1596  return SVN_NO_ERROR;
1597}
1598
1599int
1600main(int argc, const char *argv[])
1601{
1602  apr_pool_t *pool;
1603  int exit_code = EXIT_SUCCESS;
1604  svn_error_t *err;
1605
1606  /* Initialize the app. */
1607  if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
1608    return EXIT_FAILURE;
1609
1610  /* Create our top-level pool.  Use a separate mutexless allocator,
1611   * given this application is single threaded.
1612   */
1613  pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
1614
1615  err = sub_main(&exit_code, argc, argv, pool);
1616
1617  /* Flush stdout and report if it fails. It would be flushed on exit anyway
1618     but this makes sure that output is not silently lost if it fails. */
1619  err = svn_error_compose_create(err, svn_cmdline_fflush(stdout));
1620
1621  if (err)
1622    {
1623      exit_code = EXIT_FAILURE;
1624      svn_cmdline_handle_exit_error(err, NULL, "svndumpfilter: ");
1625    }
1626
1627  svn_pool_destroy(pool);
1628  return exit_code;
1629}
1630