1/*
2 * svndumpfilter.c: Subversion dump stream filtering tool main file.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <stdlib.h>
26
27#include <apr_file_io.h>
28
29#include "svn_private_config.h"
30#include "svn_cmdline.h"
31#include "svn_error.h"
32#include "svn_string.h"
33#include "svn_opt.h"
34#include "svn_utf.h"
35#include "svn_dirent_uri.h"
36#include "svn_path.h"
37#include "svn_hash.h"
38#include "svn_repos.h"
39#include "svn_fs.h"
40#include "svn_pools.h"
41#include "svn_sorts.h"
42#include "svn_props.h"
43#include "svn_mergeinfo.h"
44#include "svn_version.h"
45
46#include "private/svn_mergeinfo_private.h"
47#include "private/svn_cmdline_private.h"
48#include "private/svn_subr_private.h"
49
50#ifdef _WIN32
51typedef apr_status_t (__stdcall *open_fn_t)(apr_file_t **, apr_pool_t *);
52#else
53typedef apr_status_t (*open_fn_t)(apr_file_t **, apr_pool_t *);
54#endif
55
56/*** Code. ***/
57
58/* Helper to open stdio streams */
59
60/* NOTE: we used to call svn_stream_from_stdio(), which wraps a stream
61   around a standard stdio.h FILE pointer.  The problem is that these
62   pointers operate through C Run Time (CRT) on Win32, which does all
63   sorts of translation on them: LF's become CRLF's, and ctrl-Z's
64   embedded in Word documents are interpreted as premature EOF's.
65
66   So instead, we use apr_file_open_std*, which bypass the CRT and
67   directly wrap the OS's file-handles, which don't know or care about
68   translation.  Thus dump/load works correctly on Win32.
69*/
70static svn_error_t *
71create_stdio_stream(svn_stream_t **stream,
72                    open_fn_t open_fn,
73                    apr_pool_t *pool)
74{
75  apr_file_t *stdio_file;
76  apr_status_t apr_err = open_fn(&stdio_file, pool);
77
78  if (apr_err)
79    return svn_error_wrap_apr(apr_err, _("Can't open stdio file"));
80
81  *stream = svn_stream_from_aprfile2(stdio_file, TRUE, pool);
82  return SVN_NO_ERROR;
83}
84
85
86/* Writes a property in dumpfile format to given stringbuf. */
87static void
88write_prop_to_stringbuf(svn_stringbuf_t *strbuf,
89                        const char *name,
90                        const svn_string_t *value)
91{
92  int bytes_used;
93  size_t namelen;
94  char buf[SVN_KEYLINE_MAXLEN];
95
96  /* Output name length, then name. */
97  namelen = strlen(name);
98  svn_stringbuf_appendbytes(strbuf, "K ", 2);
99
100  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
101  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
102  svn_stringbuf_appendbyte(strbuf, '\n');
103
104  svn_stringbuf_appendbytes(strbuf, name, namelen);
105  svn_stringbuf_appendbyte(strbuf, '\n');
106
107  /* Output value length, then value. */
108  svn_stringbuf_appendbytes(strbuf, "V ", 2);
109
110  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, value->len);
111  svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
112  svn_stringbuf_appendbyte(strbuf, '\n');
113
114  svn_stringbuf_appendbytes(strbuf, value->data, value->len);
115  svn_stringbuf_appendbyte(strbuf, '\n');
116}
117
118
119/* Writes a property deletion in dumpfile format to given stringbuf. */
120static void
121write_propdel_to_stringbuf(svn_stringbuf_t **strbuf,
122                           const char *name)
123{
124  int bytes_used;
125  size_t namelen;
126  char buf[SVN_KEYLINE_MAXLEN];
127
128  /* Output name length, then name. */
129  namelen = strlen(name);
130  svn_stringbuf_appendbytes(*strbuf, "D ", 2);
131
132  bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
133  svn_stringbuf_appendbytes(*strbuf, buf, bytes_used);
134  svn_stringbuf_appendbyte(*strbuf, '\n');
135
136  svn_stringbuf_appendbytes(*strbuf, name, namelen);
137  svn_stringbuf_appendbyte(*strbuf, '\n');
138}
139
140
141/* Compare the node-path PATH with the (const char *) prefixes in PFXLIST.
142 * Return TRUE if any prefix is a prefix of PATH (matching whole path
143 * components); FALSE otherwise.
144 * PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
145static svn_boolean_t
146ary_prefix_match(const apr_array_header_t *pfxlist, const char *path)
147{
148  int i;
149  size_t path_len = strlen(path);
150
151  for (i = 0; i < pfxlist->nelts; i++)
152    {
153      const char *pfx = APR_ARRAY_IDX(pfxlist, i, const char *);
154      size_t pfx_len = strlen(pfx);
155
156      if (path_len < pfx_len)
157        continue;
158      if (strncmp(path, pfx, pfx_len) == 0
159          && (pfx_len == 1 || path[pfx_len] == '\0' || path[pfx_len] == '/'))
160        return TRUE;
161    }
162
163  return FALSE;
164}
165
166
167/* Check whether we need to skip this PATH based on its presence in
168   the PREFIXES list, and the DO_EXCLUDE option.
169   PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
170static APR_INLINE svn_boolean_t
171skip_path(const char *path, const apr_array_header_t *prefixes,
172          svn_boolean_t do_exclude, svn_boolean_t glob)
173{
174  const svn_boolean_t matches =
175    (glob
176     ? svn_cstring_match_glob_list(path, prefixes)
177     : ary_prefix_match(prefixes, path));
178
179  /* NXOR */
180  return (matches ? do_exclude : !do_exclude);
181}
182
183
184
185/* Note: the input stream parser calls us with events.
186   Output of the filtered dump occurs for the most part streamily with the
187   event callbacks, to avoid caching large quantities of data in memory.
188   The exceptions this are:
189   - All revision data (headers and props) must be cached until a non-skipped
190     node within the revision is found, or the revision is closed.
191   - Node headers and props must be cached until all props have been received
192     (to allow the Prop-content-length to be found). This is signalled either
193     by the node text arriving, or the node being closed.
194   The writing_begun members of the associated object batons track the state.
195   output_revision() and output_node() are called to cause this flushing of
196   cached data to occur.
197*/
198
199
200/* Filtering batons */
201
202struct revmap_t
203{
204  svn_revnum_t rev; /* Last non-dropped revision to which this maps. */
205  svn_boolean_t was_dropped; /* Was this revision dropped? */
206};
207
208struct parse_baton_t
209{
210  /* Command-line options values. */
211  svn_boolean_t do_exclude;
212  svn_boolean_t quiet;
213  svn_boolean_t glob;
214  svn_boolean_t drop_empty_revs;
215  svn_boolean_t drop_all_empty_revs;
216  svn_boolean_t do_renumber_revs;
217  svn_boolean_t preserve_revprops;
218  svn_boolean_t skip_missing_merge_sources;
219  svn_boolean_t allow_deltas;
220  apr_array_header_t *prefixes;
221
222  /* Input and output streams. */
223  svn_stream_t *in_stream;
224  svn_stream_t *out_stream;
225
226  /* State for the filtering process. */
227  apr_int32_t rev_drop_count;
228  apr_hash_t *dropped_nodes;
229  apr_hash_t *renumber_history;  /* svn_revnum_t -> struct revmap_t */
230  svn_revnum_t last_live_revision;
231  /* The oldest original revision, greater than r0, in the input
232     stream which was not filtered. */
233  svn_revnum_t oldest_original_rev;
234};
235
236struct revision_baton_t
237{
238  /* Reference to the global parse baton. */
239  struct parse_baton_t *pb;
240
241  /* Does this revision have node or prop changes? */
242  svn_boolean_t has_nodes;
243  svn_boolean_t has_props;
244
245  /* Did we drop any nodes? */
246  svn_boolean_t had_dropped_nodes;
247
248  /* Written to output stream? */
249  svn_boolean_t writing_begun;
250
251  /* The original and new (re-mapped) revision numbers. */
252  svn_revnum_t rev_orig;
253  svn_revnum_t rev_actual;
254
255  /* Pointers to dumpfile data. */
256  svn_stringbuf_t *header;
257  apr_hash_t *props;
258};
259
260struct node_baton_t
261{
262  /* Reference to the current revision baton. */
263  struct revision_baton_t *rb;
264
265  /* Are we skipping this node? */
266  svn_boolean_t do_skip;
267
268  /* Have we been instructed to change or remove props on, or change
269     the text of, this node? */
270  svn_boolean_t has_props;
271  svn_boolean_t has_text;
272
273  /* Written to output stream? */
274  svn_boolean_t writing_begun;
275
276  /* The text content length according to the dumpfile headers, because we
277     need the length before we have the actual text. */
278  svn_filesize_t tcl;
279
280  /* Pointers to dumpfile data. */
281  svn_stringbuf_t *header;
282  svn_stringbuf_t *props;
283
284  /* Expect deltas? */
285  svn_boolean_t has_prop_delta;
286  svn_boolean_t has_text_delta;
287
288  /* We might need the node path in a parse error message. */
289  char *node_path;
290};
291
292
293
294/* Filtering vtable members */
295
296/* File-format stamp. */
297static svn_error_t *
298magic_header_record(int version, void *parse_baton, apr_pool_t *pool)
299{
300  struct parse_baton_t *pb = parse_baton;
301
302  if (version >= SVN_REPOS_DUMPFILE_FORMAT_VERSION_DELTAS)
303    pb->allow_deltas = TRUE;
304
305  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
306                            SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n",
307                            version));
308
309  return SVN_NO_ERROR;
310}
311
312
313/* New revision: set up revision_baton, decide if we skip it. */
314static svn_error_t *
315new_revision_record(void **revision_baton,
316                    apr_hash_t *headers,
317                    void *parse_baton,
318                    apr_pool_t *pool)
319{
320  struct revision_baton_t *rb;
321  apr_hash_index_t *hi;
322  const char *rev_orig;
323  svn_stream_t *header_stream;
324
325  *revision_baton = apr_palloc(pool, sizeof(struct revision_baton_t));
326  rb = *revision_baton;
327  rb->pb = parse_baton;
328  rb->has_nodes = FALSE;
329  rb->has_props = FALSE;
330  rb->had_dropped_nodes = FALSE;
331  rb->writing_begun = FALSE;
332  rb->header = svn_stringbuf_create_empty(pool);
333  rb->props = apr_hash_make(pool);
334
335  header_stream = svn_stream_from_stringbuf(rb->header, pool);
336
337  rev_orig = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER);
338  rb->rev_orig = SVN_STR_TO_REV(rev_orig);
339
340  if (rb->pb->do_renumber_revs)
341    rb->rev_actual = rb->rev_orig - rb->pb->rev_drop_count;
342  else
343    rb->rev_actual = rb->rev_orig;
344
345  SVN_ERR(svn_stream_printf(header_stream, pool,
346                            SVN_REPOS_DUMPFILE_REVISION_NUMBER ": %ld\n",
347                            rb->rev_actual));
348
349  for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
350    {
351      const char *key = svn__apr_hash_index_key(hi);
352      const char *val = svn__apr_hash_index_val(hi);
353
354      if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
355          || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
356          || (!strcmp(key, SVN_REPOS_DUMPFILE_REVISION_NUMBER)))
357        continue;
358
359      /* passthru: put header into header stringbuf. */
360
361      SVN_ERR(svn_stream_printf(header_stream, pool, "%s: %s\n",
362                                key, val));
363    }
364
365  SVN_ERR(svn_stream_close(header_stream));
366
367  return SVN_NO_ERROR;
368}
369
370
371/* Output revision to dumpstream
372   This may be called by new_node_record(), iff rb->has_nodes has been set
373   to TRUE, or by close_revision() otherwise. This must only be called
374   if rb->writing_begun is FALSE. */
375static svn_error_t *
376output_revision(struct revision_baton_t *rb)
377{
378  int bytes_used;
379  char buf[SVN_KEYLINE_MAXLEN];
380  apr_hash_index_t *hi;
381  svn_boolean_t write_out_rev = FALSE;
382  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
383  svn_stringbuf_t *props = svn_stringbuf_create_empty(hash_pool);
384  apr_pool_t *subpool = svn_pool_create(hash_pool);
385
386  rb->writing_begun = TRUE;
387
388  /* If this revision has no nodes left because the ones it had were
389     dropped, and we are not dropping empty revisions, and we were not
390     told to preserve revision props, then we want to fixup the
391     revision props to only contain:
392       - the date
393       - a log message that reports that this revision is just stuffing. */
394  if ((! rb->pb->preserve_revprops)
395      && (! rb->has_nodes)
396      && rb->had_dropped_nodes
397      && (! rb->pb->drop_empty_revs)
398      && (! rb->pb->drop_all_empty_revs))
399    {
400      apr_hash_t *old_props = rb->props;
401      rb->has_props = TRUE;
402      rb->props = apr_hash_make(hash_pool);
403      svn_hash_sets(rb->props, SVN_PROP_REVISION_DATE,
404                    svn_hash_gets(old_props, SVN_PROP_REVISION_DATE));
405      svn_hash_sets(rb->props, SVN_PROP_REVISION_LOG,
406                    svn_string_create(_("This is an empty revision for "
407                                        "padding."), hash_pool));
408    }
409
410  /* Now, "rasterize" the props to a string, and append the property
411     information to the header string.  */
412  if (rb->has_props)
413    {
414      for (hi = apr_hash_first(subpool, rb->props);
415           hi;
416           hi = apr_hash_next(hi))
417        {
418          const char *pname = svn__apr_hash_index_key(hi);
419          const svn_string_t *pval = svn__apr_hash_index_val(hi);
420
421          write_prop_to_stringbuf(props, pname, pval);
422        }
423      svn_stringbuf_appendcstr(props, "PROPS-END\n");
424      svn_stringbuf_appendcstr(rb->header,
425                               SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
426      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT,
427                                props->len);
428      svn_stringbuf_appendbytes(rb->header, buf, bytes_used);
429      svn_stringbuf_appendbyte(rb->header, '\n');
430    }
431
432  svn_stringbuf_appendcstr(rb->header, SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
433  bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT, props->len);
434  svn_stringbuf_appendbytes(rb->header, buf, bytes_used);
435  svn_stringbuf_appendbyte(rb->header, '\n');
436
437  /* put an end to headers */
438  svn_stringbuf_appendbyte(rb->header, '\n');
439
440  /* put an end to revision */
441  svn_stringbuf_appendbyte(props, '\n');
442
443  /* write out the revision */
444  /* Revision is written out in the following cases:
445     1. If the revision has nodes or
446     it is revision 0 (Special case: To preserve the props on r0).
447     2. --drop-empty-revs has been supplied,
448     but revision has not all nodes dropped.
449     3. If no --drop-empty-revs or --drop-all-empty-revs have been supplied,
450     write out the revision which has no nodes to begin with.
451  */
452  if (rb->has_nodes || (rb->rev_orig == 0))
453    write_out_rev = TRUE;
454  else if (rb->pb->drop_empty_revs)
455    write_out_rev = ! rb->had_dropped_nodes;
456  else if (! rb->pb->drop_all_empty_revs)
457    write_out_rev = TRUE;
458
459  if (write_out_rev)
460    {
461      /* This revision is a keeper. */
462      SVN_ERR(svn_stream_write(rb->pb->out_stream,
463                               rb->header->data, &(rb->header->len)));
464      SVN_ERR(svn_stream_write(rb->pb->out_stream,
465                               props->data, &(props->len)));
466
467      /* Stash the oldest original rev not dropped. */
468      if (rb->rev_orig > 0
469          && !SVN_IS_VALID_REVNUM(rb->pb->oldest_original_rev))
470        rb->pb->oldest_original_rev = rb->rev_orig;
471
472      if (rb->pb->do_renumber_revs)
473        {
474          svn_revnum_t *rr_key;
475          struct revmap_t *rr_val;
476          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
477          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
478          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
479          *rr_key = rb->rev_orig;
480          rr_val->rev = rb->rev_actual;
481          rr_val->was_dropped = FALSE;
482          apr_hash_set(rb->pb->renumber_history, rr_key,
483                       sizeof(*rr_key), rr_val);
484          rb->pb->last_live_revision = rb->rev_actual;
485        }
486
487      if (! rb->pb->quiet)
488        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
489                                    _("Revision %ld committed as %ld.\n"),
490                                    rb->rev_orig, rb->rev_actual));
491    }
492  else
493    {
494      /* We're dropping this revision. */
495      rb->pb->rev_drop_count++;
496      if (rb->pb->do_renumber_revs)
497        {
498          svn_revnum_t *rr_key;
499          struct revmap_t *rr_val;
500          apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
501          rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
502          rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
503          *rr_key = rb->rev_orig;
504          rr_val->rev = rb->pb->last_live_revision;
505          rr_val->was_dropped = TRUE;
506          apr_hash_set(rb->pb->renumber_history, rr_key,
507                       sizeof(*rr_key), rr_val);
508        }
509
510      if (! rb->pb->quiet)
511        SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
512                                    _("Revision %ld skipped.\n"),
513                                    rb->rev_orig));
514    }
515  svn_pool_destroy(subpool);
516  return SVN_NO_ERROR;
517}
518
519
520/* UUID record here: dump it, as we do not filter them. */
521static svn_error_t *
522uuid_record(const char *uuid, void *parse_baton, apr_pool_t *pool)
523{
524  struct parse_baton_t *pb = parse_baton;
525  SVN_ERR(svn_stream_printf(pb->out_stream, pool,
526                            SVN_REPOS_DUMPFILE_UUID ": %s\n\n", uuid));
527  return SVN_NO_ERROR;
528}
529
530
531/* New node here. Set up node_baton by copying headers. */
532static svn_error_t *
533new_node_record(void **node_baton,
534                apr_hash_t *headers,
535                void *rev_baton,
536                apr_pool_t *pool)
537{
538  struct parse_baton_t *pb;
539  struct node_baton_t *nb;
540  char *node_path, *copyfrom_path;
541  apr_hash_index_t *hi;
542  const char *tcl;
543
544  *node_baton = apr_palloc(pool, sizeof(struct node_baton_t));
545  nb          = *node_baton;
546  nb->rb      = rev_baton;
547  pb          = nb->rb->pb;
548
549  node_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH);
550  copyfrom_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH);
551
552  /* Ensure that paths start with a leading '/'. */
553  if (node_path[0] != '/')
554    node_path = apr_pstrcat(pool, "/", node_path, (char *)NULL);
555  if (copyfrom_path && copyfrom_path[0] != '/')
556    copyfrom_path = apr_pstrcat(pool, "/", copyfrom_path, (char *)NULL);
557
558  nb->do_skip = skip_path(node_path, pb->prefixes,
559                          pb->do_exclude, pb->glob);
560
561  /* If we're skipping the node, take note of path, discarding the
562     rest.  */
563  if (nb->do_skip)
564    {
565      svn_hash_sets(pb->dropped_nodes,
566                    apr_pstrdup(apr_hash_pool_get(pb->dropped_nodes),
567                                node_path),
568                    (void *)1);
569      nb->rb->had_dropped_nodes = TRUE;
570    }
571  else
572    {
573      const char *kind;
574      const char *action;
575
576      tcl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
577
578      /* Test if this node was copied from dropped source. */
579      if (copyfrom_path &&
580          skip_path(copyfrom_path, pb->prefixes, pb->do_exclude, pb->glob))
581        {
582          /* This node was copied from a dropped source.
583             We have a problem, since we did not want to drop this node too.
584
585             However, there is one special case we'll handle.  If the node is
586             a file, and this was a copy-and-modify operation, then the
587             dumpfile should contain the new contents of the file.  In this
588             scenario, we'll just do an add without history using the new
589             contents.  */
590          kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
591
592          /* If there is a Text-content-length header, and the kind is
593             "file", we just fallback to an add without history. */
594          if (tcl && (strcmp(kind, "file") == 0))
595            {
596              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH,
597                            NULL);
598              svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
599                            NULL);
600              copyfrom_path = NULL;
601            }
602          /* Else, this is either a directory or a file whose contents we
603             don't have readily available.  */
604          else
605            {
606              return svn_error_createf
607                (SVN_ERR_INCOMPLETE_DATA, 0,
608                 _("Invalid copy source path '%s'"), copyfrom_path);
609            }
610        }
611
612      nb->has_props = FALSE;
613      nb->has_text = FALSE;
614      nb->has_prop_delta = FALSE;
615      nb->has_text_delta = FALSE;
616      nb->writing_begun = FALSE;
617      nb->tcl = tcl ? svn__atoui64(tcl) : 0;
618      nb->header = svn_stringbuf_create_empty(pool);
619      nb->props = svn_stringbuf_create_empty(pool);
620      nb->node_path = apr_pstrdup(pool, node_path);
621
622      /* Now we know for sure that we have a node that will not be
623         skipped, flush the revision if it has not already been done. */
624      nb->rb->has_nodes = TRUE;
625      if (! nb->rb->writing_begun)
626        SVN_ERR(output_revision(nb->rb));
627
628      /* A node record is required to begin with 'Node-path', skip the
629         leading '/' to match the form used by 'svnadmin dump'. */
630      SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
631                                pool, "%s: %s\n",
632                                SVN_REPOS_DUMPFILE_NODE_PATH, node_path + 1));
633
634      /* Node-kind is next and is optional. */
635      kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
636      if (kind)
637        SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
638                                  pool, "%s: %s\n",
639                                  SVN_REPOS_DUMPFILE_NODE_KIND, kind));
640
641      /* Node-action is next and required. */
642      action = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_ACTION);
643      if (action)
644        SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
645                                  pool, "%s: %s\n",
646                                  SVN_REPOS_DUMPFILE_NODE_ACTION, action));
647      else
648        return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
649                                 _("Missing Node-action for path '%s'"),
650                                 node_path);
651
652      for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
653        {
654          const char *key = svn__apr_hash_index_key(hi);
655          const char *val = svn__apr_hash_index_val(hi);
656
657          if ((!strcmp(key, SVN_REPOS_DUMPFILE_PROP_DELTA))
658              && (!strcmp(val, "true")))
659            nb->has_prop_delta = TRUE;
660
661          if ((!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_DELTA))
662              && (!strcmp(val, "true")))
663            nb->has_text_delta = TRUE;
664
665          if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
666              || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
667              || (!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH))
668              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_PATH))
669              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_KIND))
670              || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_ACTION)))
671            continue;
672
673          /* Rewrite Node-Copyfrom-Rev if we are renumbering revisions.
674             The number points to some revision in the past. We keep track
675             of revision renumbering in an apr_hash, which maps original
676             revisions to new ones. Dropped revision are mapped to -1.
677             This should never happen here.
678          */
679          if (pb->do_renumber_revs
680              && (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV)))
681            {
682              svn_revnum_t cf_orig_rev;
683              struct revmap_t *cf_renum_val;
684
685              cf_orig_rev = SVN_STR_TO_REV(val);
686              cf_renum_val = apr_hash_get(pb->renumber_history,
687                                          &cf_orig_rev,
688                                          sizeof(svn_revnum_t));
689              if (! (cf_renum_val && SVN_IS_VALID_REVNUM(cf_renum_val->rev)))
690                return svn_error_createf
691                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
692                   _("No valid copyfrom revision in filtered stream"));
693              SVN_ERR(svn_stream_printf
694                      (nb->rb->pb->out_stream, pool,
695                       SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV ": %ld\n",
696                       cf_renum_val->rev));
697              continue;
698            }
699
700          /* passthru: put header straight to output */
701
702          SVN_ERR(svn_stream_printf(nb->rb->pb->out_stream,
703                                    pool, "%s: %s\n",
704                                    key, val));
705        }
706    }
707
708  return SVN_NO_ERROR;
709}
710
711
712/* Output node header and props to dumpstream
713   This will be called by set_fulltext() after setting nb->has_text to TRUE,
714   if the node has any text, or by close_node() otherwise. This must only
715   be called if nb->writing_begun is FALSE. */
716static svn_error_t *
717output_node(struct node_baton_t *nb)
718{
719  int bytes_used;
720  char buf[SVN_KEYLINE_MAXLEN];
721
722  nb->writing_begun = TRUE;
723
724  /* when there are no props nb->props->len would be zero and won't mess up
725     Content-Length. */
726  if (nb->has_props)
727    svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
728
729  /* 1. recalculate & check text-md5 if present. Passed through right now. */
730
731  /* 2. recalculate and add content-lengths */
732
733  if (nb->has_props)
734    {
735      svn_stringbuf_appendcstr(nb->header,
736                               SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
737      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" APR_SIZE_T_FMT,
738                                nb->props->len);
739      svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
740      svn_stringbuf_appendbyte(nb->header, '\n');
741    }
742  if (nb->has_text)
743    {
744      svn_stringbuf_appendcstr(nb->header,
745                               SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
746      bytes_used = apr_snprintf(buf, sizeof(buf), ": %" SVN_FILESIZE_T_FMT,
747                                nb->tcl);
748      svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
749      svn_stringbuf_appendbyte(nb->header, '\n');
750    }
751  svn_stringbuf_appendcstr(nb->header, SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
752  bytes_used = apr_snprintf(buf, sizeof(buf), ": %" SVN_FILESIZE_T_FMT,
753                            (svn_filesize_t) (nb->props->len + nb->tcl));
754  svn_stringbuf_appendbytes(nb->header, buf, bytes_used);
755  svn_stringbuf_appendbyte(nb->header, '\n');
756
757  /* put an end to headers */
758  svn_stringbuf_appendbyte(nb->header, '\n');
759
760  /* 3. output all the stuff */
761
762  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream,
763                           nb->header->data , &(nb->header->len)));
764  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream,
765                           nb->props->data , &(nb->props->len)));
766
767  return SVN_NO_ERROR;
768}
769
770
771/* Examine the mergeinfo in INITIAL_VAL, omitting missing merge
772   sources or renumbering revisions in rangelists as appropriate, and
773   return the (possibly new) mergeinfo in *FINAL_VAL (allocated from
774   POOL). */
775static svn_error_t *
776adjust_mergeinfo(svn_string_t **final_val, const svn_string_t *initial_val,
777                 struct revision_baton_t *rb, apr_pool_t *pool)
778{
779  apr_hash_t *mergeinfo;
780  apr_hash_t *final_mergeinfo = apr_hash_make(pool);
781  apr_hash_index_t *hi;
782  apr_pool_t *subpool = svn_pool_create(pool);
783
784  SVN_ERR(svn_mergeinfo_parse(&mergeinfo, initial_val->data, subpool));
785
786  /* Issue #3020: If we are skipping missing merge sources, then also
787     filter mergeinfo ranges as old or older than the oldest revision in the
788     dump stream.  Those older than the oldest obviously refer to history
789     outside of the dump stream.  The oldest rev itself is present in the
790     dump, but cannot be a valid merge source revision since it is the
791     start of all history.  E.g. if we dump -r100:400 then dumpfilter the
792     result with --skip-missing-merge-sources, any mergeinfo with revision
793     100 implies a change of -r99:100, but r99 is part of the history we
794     want filtered.  This is analogous to how r1 is always meaningless as
795     a merge source revision.
796
797     If the oldest rev is r0 then there is nothing to filter. */
798  if (rb->pb->skip_missing_merge_sources && rb->pb->oldest_original_rev > 0)
799    SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
800      &mergeinfo, mergeinfo,
801      rb->pb->oldest_original_rev, 0,
802      FALSE, subpool, subpool));
803
804  for (hi = apr_hash_first(subpool, mergeinfo); hi; hi = apr_hash_next(hi))
805    {
806      const char *merge_source = svn__apr_hash_index_key(hi);
807      svn_rangelist_t *rangelist = svn__apr_hash_index_val(hi);
808      struct parse_baton_t *pb = rb->pb;
809
810      /* Determine whether the merge_source is a part of the prefix. */
811      if (skip_path(merge_source, pb->prefixes, pb->do_exclude, pb->glob))
812        {
813          if (pb->skip_missing_merge_sources)
814            continue;
815          else
816            return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
817                                     _("Missing merge source path '%s'; try "
818                                       "with --skip-missing-merge-sources"),
819                                     merge_source);
820        }
821
822      /* Possibly renumber revisions in merge source's rangelist. */
823      if (pb->do_renumber_revs)
824        {
825          int i;
826
827          for (i = 0; i < rangelist->nelts; i++)
828            {
829              struct revmap_t *revmap_start;
830              struct revmap_t *revmap_end;
831              svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
832                                                       svn_merge_range_t *);
833
834              revmap_start = apr_hash_get(pb->renumber_history,
835                                          &range->start, sizeof(svn_revnum_t));
836              if (! (revmap_start && SVN_IS_VALID_REVNUM(revmap_start->rev)))
837                return svn_error_createf
838                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
839                   _("No valid revision range 'start' in filtered stream"));
840
841              revmap_end = apr_hash_get(pb->renumber_history,
842                                        &range->end, sizeof(svn_revnum_t));
843              if (! (revmap_end && SVN_IS_VALID_REVNUM(revmap_end->rev)))
844                return svn_error_createf
845                  (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
846                   _("No valid revision range 'end' in filtered stream"));
847
848              range->start = revmap_start->rev;
849              range->end = revmap_end->rev;
850            }
851        }
852      svn_hash_sets(final_mergeinfo, merge_source, rangelist);
853    }
854
855  SVN_ERR(svn_mergeinfo_sort(final_mergeinfo, subpool));
856  SVN_ERR(svn_mergeinfo_to_string(final_val, final_mergeinfo, pool));
857  svn_pool_destroy(subpool);
858
859  return SVN_NO_ERROR;
860}
861
862
863static svn_error_t *
864set_revision_property(void *revision_baton,
865                      const char *name,
866                      const svn_string_t *value)
867{
868  struct revision_baton_t *rb = revision_baton;
869  apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
870
871  rb->has_props = TRUE;
872  svn_hash_sets(rb->props,
873                apr_pstrdup(hash_pool, name),
874                svn_string_dup(value, hash_pool));
875  return SVN_NO_ERROR;
876}
877
878
879static svn_error_t *
880set_node_property(void *node_baton,
881                  const char *name,
882                  const svn_string_t *value)
883{
884  struct node_baton_t *nb = node_baton;
885  struct revision_baton_t *rb = nb->rb;
886
887  if (nb->do_skip)
888    return SVN_NO_ERROR;
889
890  if (! (nb->has_props || nb->has_prop_delta))
891    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
892                             _("Delta property block detected, but deltas "
893                               "are not enabled for node '%s' in original "
894                               "revision %ld"),
895                             nb->node_path, rb->rev_orig);
896
897  if (strcmp(name, SVN_PROP_MERGEINFO) == 0)
898    {
899      svn_string_t *filtered_mergeinfo;  /* Avoid compiler warning. */
900      apr_pool_t *pool = apr_hash_pool_get(rb->props);
901      SVN_ERR(adjust_mergeinfo(&filtered_mergeinfo, value, rb, pool));
902      value = filtered_mergeinfo;
903    }
904
905  nb->has_props = TRUE;
906  write_prop_to_stringbuf(nb->props, name, value);
907
908  return SVN_NO_ERROR;
909}
910
911
912static svn_error_t *
913delete_node_property(void *node_baton, const char *name)
914{
915  struct node_baton_t *nb = node_baton;
916  struct revision_baton_t *rb = nb->rb;
917
918  if (nb->do_skip)
919    return SVN_NO_ERROR;
920
921  if (!nb->has_prop_delta)
922    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
923                             _("Delta property block detected, but deltas "
924                               "are not enabled for node '%s' in original "
925                               "revision %ld"),
926                             nb->node_path, rb->rev_orig);
927
928  nb->has_props = TRUE;
929  write_propdel_to_stringbuf(&(nb->props), name);
930
931  return SVN_NO_ERROR;
932}
933
934
935static svn_error_t *
936remove_node_props(void *node_baton)
937{
938  struct node_baton_t *nb = node_baton;
939
940  /* In this case, not actually indicating that the node *has* props,
941     rather that we know about all the props that it has, since it now
942     has none. */
943  nb->has_props = TRUE;
944
945  return SVN_NO_ERROR;
946}
947
948
949static svn_error_t *
950set_fulltext(svn_stream_t **stream, void *node_baton)
951{
952  struct node_baton_t *nb = node_baton;
953
954  if (!nb->do_skip)
955    {
956      nb->has_text = TRUE;
957      if (! nb->writing_begun)
958        SVN_ERR(output_node(nb));
959      *stream = nb->rb->pb->out_stream;
960    }
961
962  return SVN_NO_ERROR;
963}
964
965
966/* Finalize node */
967static svn_error_t *
968close_node(void *node_baton)
969{
970  struct node_baton_t *nb = node_baton;
971  apr_size_t len = 2;
972
973  /* Get out of here if we can. */
974  if (nb->do_skip)
975    return SVN_NO_ERROR;
976
977  /* If the node was not flushed already to output its text, do it now. */
978  if (! nb->writing_begun)
979    SVN_ERR(output_node(nb));
980
981  /* put an end to node. */
982  SVN_ERR(svn_stream_write(nb->rb->pb->out_stream, "\n\n", &len));
983
984  return SVN_NO_ERROR;
985}
986
987
988/* Finalize revision */
989static svn_error_t *
990close_revision(void *revision_baton)
991{
992  struct revision_baton_t *rb = revision_baton;
993
994  /* If no node has yet flushed the revision, do it now. */
995  if (! rb->writing_begun)
996    return output_revision(rb);
997  else
998    return SVN_NO_ERROR;
999}
1000
1001
1002/* Filtering vtable */
1003svn_repos_parse_fns3_t filtering_vtable =
1004  {
1005    magic_header_record,
1006    uuid_record,
1007    new_revision_record,
1008    new_node_record,
1009    set_revision_property,
1010    set_node_property,
1011    delete_node_property,
1012    remove_node_props,
1013    set_fulltext,
1014    NULL,
1015    close_node,
1016    close_revision
1017  };
1018
1019
1020
1021/** Subcommands. **/
1022
1023static svn_opt_subcommand_t
1024  subcommand_help,
1025  subcommand_exclude,
1026  subcommand_include;
1027
1028enum
1029  {
1030    svndumpfilter__drop_empty_revs = SVN_OPT_FIRST_LONGOPT_ID,
1031    svndumpfilter__drop_all_empty_revs,
1032    svndumpfilter__renumber_revs,
1033    svndumpfilter__preserve_revprops,
1034    svndumpfilter__skip_missing_merge_sources,
1035    svndumpfilter__targets,
1036    svndumpfilter__quiet,
1037    svndumpfilter__glob,
1038    svndumpfilter__version
1039  };
1040
1041/* Option codes and descriptions.
1042 *
1043 * The entire list must be terminated with an entry of nulls.
1044 */
1045static const apr_getopt_option_t options_table[] =
1046  {
1047    {"help",          'h', 0,
1048     N_("show help on a subcommand")},
1049
1050    {NULL,            '?', 0,
1051     N_("show help on a subcommand")},
1052
1053    {"version",            svndumpfilter__version, 0,
1054     N_("show program version information") },
1055    {"quiet",              svndumpfilter__quiet, 0,
1056     N_("Do not display filtering statistics.") },
1057    {"pattern",            svndumpfilter__glob, 0,
1058     N_("Treat the path prefixes as file glob patterns.") },
1059    {"drop-empty-revs",    svndumpfilter__drop_empty_revs, 0,
1060     N_("Remove revisions emptied by filtering.")},
1061    {"drop-all-empty-revs",    svndumpfilter__drop_all_empty_revs, 0,
1062     N_("Remove all empty revisions found in dumpstream\n"
1063        "                             except revision 0.")},
1064    {"renumber-revs",      svndumpfilter__renumber_revs, 0,
1065     N_("Renumber revisions left after filtering.") },
1066    {"skip-missing-merge-sources",
1067     svndumpfilter__skip_missing_merge_sources, 0,
1068     N_("Skip missing merge sources.") },
1069    {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
1070     N_("Don't filter revision properties.") },
1071    {"targets", svndumpfilter__targets, 1,
1072     N_("Read additional prefixes, one per line, from\n"
1073        "                             file ARG.")},
1074    {NULL}
1075  };
1076
1077
1078/* Array of available subcommands.
1079 * The entire list must be terminated with an entry of nulls.
1080 */
1081static const svn_opt_subcommand_desc2_t cmd_table[] =
1082  {
1083    {"exclude", subcommand_exclude, {0},
1084     N_("Filter out nodes with given prefixes from dumpstream.\n"
1085        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
1086     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1087      svndumpfilter__renumber_revs,
1088      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1089      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1090      svndumpfilter__glob} },
1091
1092    {"include", subcommand_include, {0},
1093     N_("Filter out nodes without given prefixes from dumpstream.\n"
1094        "usage: svndumpfilter include PATH_PREFIX...\n"),
1095     {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
1096      svndumpfilter__renumber_revs,
1097      svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1098      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1099      svndumpfilter__glob} },
1100
1101    {"help", subcommand_help, {"?", "h"},
1102     N_("Describe the usage of this program or its subcommands.\n"
1103        "usage: svndumpfilter help [SUBCOMMAND...]\n"),
1104     {0} },
1105
1106    { NULL, NULL, {0}, NULL, {0} }
1107  };
1108
1109
1110/* Baton for passing option/argument state to a subcommand function. */
1111struct svndumpfilter_opt_state
1112{
1113  svn_opt_revision_t start_revision;     /* -r X[:Y] is         */
1114  svn_opt_revision_t end_revision;       /* not implemented.    */
1115  svn_boolean_t quiet;                   /* --quiet             */
1116  svn_boolean_t glob;                    /* --pattern           */
1117  svn_boolean_t version;                 /* --version           */
1118  svn_boolean_t drop_empty_revs;         /* --drop-empty-revs   */
1119  svn_boolean_t drop_all_empty_revs;     /* --drop-all-empty-revs */
1120  svn_boolean_t help;                    /* --help or -?        */
1121  svn_boolean_t renumber_revs;           /* --renumber-revs     */
1122  svn_boolean_t preserve_revprops;       /* --preserve-revprops */
1123  svn_boolean_t skip_missing_merge_sources;
1124                                         /* --skip-missing-merge-sources */
1125  const char *targets_file;              /* --targets-file       */
1126  apr_array_header_t *prefixes;          /* mainargs.           */
1127};
1128
1129
1130static svn_error_t *
1131parse_baton_initialize(struct parse_baton_t **pb,
1132                       struct svndumpfilter_opt_state *opt_state,
1133                       svn_boolean_t do_exclude,
1134                       apr_pool_t *pool)
1135{
1136  struct parse_baton_t *baton = apr_palloc(pool, sizeof(*baton));
1137
1138  /* Read the stream from STDIN.  Users can redirect a file. */
1139  SVN_ERR(create_stdio_stream(&(baton->in_stream),
1140                              apr_file_open_stdin, pool));
1141
1142  /* Have the parser dump results to STDOUT. Users can redirect a file. */
1143  SVN_ERR(create_stdio_stream(&(baton->out_stream),
1144                              apr_file_open_stdout, pool));
1145
1146  baton->do_exclude = do_exclude;
1147
1148  /* Ignore --renumber-revs if there can't possibly be
1149     anything to renumber. */
1150  baton->do_renumber_revs =
1151    (opt_state->renumber_revs && (opt_state->drop_empty_revs
1152                                  || opt_state->drop_all_empty_revs));
1153
1154  baton->drop_empty_revs = opt_state->drop_empty_revs;
1155  baton->drop_all_empty_revs = opt_state->drop_all_empty_revs;
1156  baton->preserve_revprops = opt_state->preserve_revprops;
1157  baton->quiet = opt_state->quiet;
1158  baton->glob = opt_state->glob;
1159  baton->prefixes = opt_state->prefixes;
1160  baton->skip_missing_merge_sources = opt_state->skip_missing_merge_sources;
1161  baton->rev_drop_count = 0; /* used to shift revnums while filtering */
1162  baton->dropped_nodes = apr_hash_make(pool);
1163  baton->renumber_history = apr_hash_make(pool);
1164  baton->last_live_revision = SVN_INVALID_REVNUM;
1165  baton->oldest_original_rev = SVN_INVALID_REVNUM;
1166  baton->allow_deltas = FALSE;
1167
1168  *pb = baton;
1169  return SVN_NO_ERROR;
1170}
1171
1172/* This implements `help` subcommand. */
1173static svn_error_t *
1174subcommand_help(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1175{
1176  struct svndumpfilter_opt_state *opt_state = baton;
1177  const char *header =
1178    _("general usage: svndumpfilter SUBCOMMAND [ARGS & OPTIONS ...]\n"
1179      "Type 'svndumpfilter help <subcommand>' for help on a "
1180      "specific subcommand.\n"
1181      "Type 'svndumpfilter --version' to see the program version.\n"
1182      "\n"
1183      "Available subcommands:\n");
1184
1185  SVN_ERR(svn_opt_print_help4(os, "svndumpfilter",
1186                              opt_state ? opt_state->version : FALSE,
1187                              opt_state ? opt_state->quiet : FALSE,
1188                              /*###opt_state ? opt_state->verbose :*/ FALSE,
1189                              NULL, header, cmd_table, options_table,
1190                              NULL, NULL, pool));
1191
1192  return SVN_NO_ERROR;
1193}
1194
1195
1196/* Version compatibility check */
1197static svn_error_t *
1198check_lib_versions(void)
1199{
1200  static const svn_version_checklist_t checklist[] =
1201    {
1202      { "svn_subr",  svn_subr_version },
1203      { "svn_repos", svn_repos_version },
1204      { "svn_delta", svn_delta_version },
1205      { NULL, NULL }
1206    };
1207  SVN_VERSION_DEFINE(my_version);
1208
1209  return svn_ver_check_list2(&my_version, checklist, svn_ver_equal);
1210}
1211
1212
1213/* Do the real work of filtering. */
1214static svn_error_t *
1215do_filter(apr_getopt_t *os,
1216          void *baton,
1217          svn_boolean_t do_exclude,
1218          apr_pool_t *pool)
1219{
1220  struct svndumpfilter_opt_state *opt_state = baton;
1221  struct parse_baton_t *pb;
1222  apr_hash_index_t *hi;
1223  apr_array_header_t *keys;
1224  int i, num_keys;
1225
1226  if (! opt_state->quiet)
1227    {
1228      apr_pool_t *subpool = svn_pool_create(pool);
1229
1230      if (opt_state->glob)
1231        {
1232          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1233                                      do_exclude
1234                                      ? (opt_state->drop_empty_revs
1235                                         || opt_state->drop_all_empty_revs)
1236                                        ? _("Excluding (and dropping empty "
1237                                            "revisions for) prefix patterns:\n")
1238                                        : _("Excluding prefix patterns:\n")
1239                                      : (opt_state->drop_empty_revs
1240                                         || opt_state->drop_all_empty_revs)
1241                                        ? _("Including (and dropping empty "
1242                                            "revisions for) prefix patterns:\n")
1243                                        : _("Including prefix patterns:\n")));
1244        }
1245      else
1246        {
1247          SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1248                                      do_exclude
1249                                      ? (opt_state->drop_empty_revs
1250                                         || opt_state->drop_all_empty_revs)
1251                                        ? _("Excluding (and dropping empty "
1252                                            "revisions for) prefixes:\n")
1253                                        : _("Excluding prefixes:\n")
1254                                      : (opt_state->drop_empty_revs
1255                                         || opt_state->drop_all_empty_revs)
1256                                        ? _("Including (and dropping empty "
1257                                            "revisions for) prefixes:\n")
1258                                        : _("Including prefixes:\n")));
1259        }
1260
1261      for (i = 0; i < opt_state->prefixes->nelts; i++)
1262        {
1263          svn_pool_clear(subpool);
1264          SVN_ERR(svn_cmdline_fprintf
1265                  (stderr, subpool, "   '%s'\n",
1266                   APR_ARRAY_IDX(opt_state->prefixes, i, const char *)));
1267        }
1268
1269      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1270      svn_pool_destroy(subpool);
1271    }
1272
1273  SVN_ERR(parse_baton_initialize(&pb, opt_state, do_exclude, pool));
1274  SVN_ERR(svn_repos_parse_dumpstream3(pb->in_stream, &filtering_vtable, pb,
1275                                      TRUE, NULL, NULL, pool));
1276
1277  /* The rest of this is just reporting.  If we aren't reporting, get
1278     outta here. */
1279  if (opt_state->quiet)
1280    return SVN_NO_ERROR;
1281
1282  SVN_ERR(svn_cmdline_fputs("\n", stderr, pool));
1283
1284  if (pb->rev_drop_count)
1285    SVN_ERR(svn_cmdline_fprintf(stderr, pool,
1286                                Q_("Dropped %d revision.\n\n",
1287                                   "Dropped %d revisions.\n\n",
1288                                   pb->rev_drop_count),
1289                                pb->rev_drop_count));
1290
1291  if (pb->do_renumber_revs)
1292    {
1293      apr_pool_t *subpool = svn_pool_create(pool);
1294      SVN_ERR(svn_cmdline_fputs(_("Revisions renumbered as follows:\n"),
1295                                stderr, subpool));
1296
1297      /* Get the keys of the hash, sort them, then print the hash keys
1298         and values, sorted by keys. */
1299      num_keys = apr_hash_count(pb->renumber_history);
1300      keys = apr_array_make(pool, num_keys + 1, sizeof(svn_revnum_t));
1301      for (hi = apr_hash_first(pool, pb->renumber_history);
1302           hi;
1303           hi = apr_hash_next(hi))
1304        {
1305          const svn_revnum_t *revnum = svn__apr_hash_index_key(hi);
1306
1307          APR_ARRAY_PUSH(keys, svn_revnum_t) = *revnum;
1308        }
1309      qsort(keys->elts, keys->nelts,
1310            keys->elt_size, svn_sort_compare_revisions);
1311      for (i = 0; i < keys->nelts; i++)
1312        {
1313          svn_revnum_t this_key;
1314          struct revmap_t *this_val;
1315
1316          svn_pool_clear(subpool);
1317          this_key = APR_ARRAY_IDX(keys, i, svn_revnum_t);
1318          this_val = apr_hash_get(pb->renumber_history, &this_key,
1319                                  sizeof(this_key));
1320          if (this_val->was_dropped)
1321            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1322                                        _("   %ld => (dropped)\n"),
1323                                        this_key));
1324          else
1325            SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1326                                        "   %ld => %ld\n",
1327                                        this_key, this_val->rev));
1328        }
1329      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1330      svn_pool_destroy(subpool);
1331    }
1332
1333  if ((num_keys = apr_hash_count(pb->dropped_nodes)))
1334    {
1335      apr_pool_t *subpool = svn_pool_create(pool);
1336      SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1337                                  Q_("Dropped %d node:\n",
1338                                     "Dropped %d nodes:\n",
1339                                     num_keys),
1340                                  num_keys));
1341
1342      /* Get the keys of the hash, sort them, then print the hash keys
1343         and values, sorted by keys. */
1344      keys = apr_array_make(pool, num_keys + 1, sizeof(const char *));
1345      for (hi = apr_hash_first(pool, pb->dropped_nodes);
1346           hi;
1347           hi = apr_hash_next(hi))
1348        {
1349          const char *path = svn__apr_hash_index_key(hi);
1350
1351          APR_ARRAY_PUSH(keys, const char *) = path;
1352        }
1353      qsort(keys->elts, keys->nelts, keys->elt_size, svn_sort_compare_paths);
1354      for (i = 0; i < keys->nelts; i++)
1355        {
1356          svn_pool_clear(subpool);
1357          SVN_ERR(svn_cmdline_fprintf
1358                  (stderr, subpool, "   '%s'\n",
1359                   (const char *)APR_ARRAY_IDX(keys, i, const char *)));
1360        }
1361      SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1362      svn_pool_destroy(subpool);
1363    }
1364
1365  return SVN_NO_ERROR;
1366}
1367
1368/* This implements `exclude' subcommand. */
1369static svn_error_t *
1370subcommand_exclude(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1371{
1372  return do_filter(os, baton, TRUE, pool);
1373}
1374
1375
1376/* This implements `include` subcommand. */
1377static svn_error_t *
1378subcommand_include(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1379{
1380  return do_filter(os, baton, FALSE, pool);
1381}
1382
1383
1384
1385/** Main. **/
1386
1387int
1388main(int argc, const char *argv[])
1389{
1390  svn_error_t *err;
1391  apr_status_t apr_err;
1392  apr_pool_t *pool;
1393
1394  const svn_opt_subcommand_desc2_t *subcommand = NULL;
1395  struct svndumpfilter_opt_state opt_state;
1396  apr_getopt_t *os;
1397  int opt_id;
1398  apr_array_header_t *received_opts;
1399  int i;
1400
1401
1402  /* Initialize the app. */
1403  if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
1404    return EXIT_FAILURE;
1405
1406  /* Create our top-level pool.  Use a separate mutexless allocator,
1407   * given this application is single threaded.
1408   */
1409  pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
1410
1411  /* Check library versions */
1412  err = check_lib_versions();
1413  if (err)
1414    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1415
1416  received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int));
1417
1418  /* Initialize the FS library. */
1419  err = svn_fs_initialize(pool);
1420  if (err)
1421    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1422
1423  if (argc <= 1)
1424    {
1425      SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1426      svn_pool_destroy(pool);
1427      return EXIT_FAILURE;
1428    }
1429
1430  /* Initialize opt_state. */
1431  memset(&opt_state, 0, sizeof(opt_state));
1432  opt_state.start_revision.kind = svn_opt_revision_unspecified;
1433  opt_state.end_revision.kind = svn_opt_revision_unspecified;
1434
1435  /* Parse options. */
1436  err = svn_cmdline__getopt_init(&os, argc, argv, pool);
1437  if (err)
1438    return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1439
1440  os->interleave = 1;
1441  while (1)
1442    {
1443      const char *opt_arg;
1444
1445      /* Parse the next option. */
1446      apr_err = apr_getopt_long(os, options_table, &opt_id, &opt_arg);
1447      if (APR_STATUS_IS_EOF(apr_err))
1448        break;
1449      else if (apr_err)
1450        {
1451          SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1452          svn_pool_destroy(pool);
1453          return EXIT_FAILURE;
1454        }
1455
1456      /* Stash the option code in an array before parsing it. */
1457      APR_ARRAY_PUSH(received_opts, int) = opt_id;
1458
1459      switch (opt_id)
1460        {
1461        case 'h':
1462        case '?':
1463          opt_state.help = TRUE;
1464          break;
1465        case svndumpfilter__version:
1466          opt_state.version = TRUE;
1467          break;
1468        case svndumpfilter__quiet:
1469          opt_state.quiet = TRUE;
1470          break;
1471        case svndumpfilter__glob:
1472          opt_state.glob = TRUE;
1473          break;
1474        case svndumpfilter__drop_empty_revs:
1475          opt_state.drop_empty_revs = TRUE;
1476          break;
1477        case svndumpfilter__drop_all_empty_revs:
1478          opt_state.drop_all_empty_revs = TRUE;
1479          break;
1480        case svndumpfilter__renumber_revs:
1481          opt_state.renumber_revs = TRUE;
1482          break;
1483        case svndumpfilter__preserve_revprops:
1484          opt_state.preserve_revprops = TRUE;
1485          break;
1486        case svndumpfilter__skip_missing_merge_sources:
1487          opt_state.skip_missing_merge_sources = TRUE;
1488          break;
1489        case svndumpfilter__targets:
1490          opt_state.targets_file = opt_arg;
1491          break;
1492        default:
1493          {
1494            SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1495            svn_pool_destroy(pool);
1496            return EXIT_FAILURE;
1497          }
1498        }  /* close `switch' */
1499    }  /* close `while' */
1500
1501  /* Disallow simultaneous use of both --drop-empty-revs and
1502     --drop-all-empty-revs. */
1503  if (opt_state.drop_empty_revs && opt_state.drop_all_empty_revs)
1504    {
1505      err = svn_error_create(SVN_ERR_CL_MUTUALLY_EXCLUSIVE_ARGS, NULL,
1506                             _("--drop-empty-revs cannot be used with "
1507                               "--drop-all-empty-revs"));
1508      return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1509    }
1510
1511  /* If the user asked for help, then the rest of the arguments are
1512     the names of subcommands to get help on (if any), or else they're
1513     just typos/mistakes.  Whatever the case, the subcommand to
1514     actually run is subcommand_help(). */
1515  if (opt_state.help)
1516    subcommand = svn_opt_get_canonical_subcommand2(cmd_table, "help");
1517
1518  /* If we're not running the `help' subcommand, then look for a
1519     subcommand in the first argument. */
1520  if (subcommand == NULL)
1521    {
1522      if (os->ind >= os->argc)
1523        {
1524          if (opt_state.version)
1525            {
1526              /* Use the "help" subcommand to handle the "--version" option. */
1527              static const svn_opt_subcommand_desc2_t pseudo_cmd =
1528                { "--version", subcommand_help, {0}, "",
1529                  {svndumpfilter__version,  /* must accept its own option */
1530                   svndumpfilter__quiet,
1531                  } };
1532
1533              subcommand = &pseudo_cmd;
1534            }
1535          else
1536            {
1537              svn_error_clear(svn_cmdline_fprintf
1538                              (stderr, pool,
1539                               _("Subcommand argument required\n")));
1540              SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1541              svn_pool_destroy(pool);
1542              return EXIT_FAILURE;
1543            }
1544        }
1545      else
1546        {
1547          const char *first_arg = os->argv[os->ind++];
1548          subcommand = svn_opt_get_canonical_subcommand2(cmd_table, first_arg);
1549          if (subcommand == NULL)
1550            {
1551              const char* first_arg_utf8;
1552              if ((err = svn_utf_cstring_to_utf8(&first_arg_utf8, first_arg,
1553                                                 pool)))
1554                return svn_cmdline_handle_exit_error(err, pool,
1555                                                     "svndumpfilter: ");
1556
1557              svn_error_clear(
1558                svn_cmdline_fprintf(stderr, pool,
1559                                    _("Unknown subcommand: '%s'\n"),
1560                                    first_arg_utf8));
1561              SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1562              svn_pool_destroy(pool);
1563              return EXIT_FAILURE;
1564            }
1565        }
1566    }
1567
1568  /* If there's a second argument, it's probably [one of] prefixes.
1569     Every subcommand except `help' requires at least one, so we parse
1570     them out here and store in opt_state. */
1571
1572  if (subcommand->cmd_func != subcommand_help)
1573    {
1574
1575      opt_state.prefixes = apr_array_make(pool, os->argc - os->ind,
1576                                          sizeof(const char *));
1577      for (i = os->ind ; i< os->argc; i++)
1578        {
1579          const char *prefix;
1580
1581          /* Ensure that each prefix is UTF8-encoded, in internal
1582             style, and absolute. */
1583          SVN_INT_ERR(svn_utf_cstring_to_utf8(&prefix, os->argv[i], pool));
1584          prefix = svn_relpath__internal_style(prefix, pool);
1585          if (prefix[0] != '/')
1586            prefix = apr_pstrcat(pool, "/", prefix, (char *)NULL);
1587          APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1588        }
1589
1590      if (opt_state.targets_file)
1591        {
1592          svn_stringbuf_t *buffer, *buffer_utf8;
1593          const char *utf8_targets_file;
1594          apr_array_header_t *targets = apr_array_make(pool, 0,
1595                                                       sizeof(const char *));
1596
1597          /* We need to convert to UTF-8 now, even before we divide
1598             the targets into an array, because otherwise we wouldn't
1599             know what delimiter to use for svn_cstring_split().  */
1600
1601          SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_targets_file,
1602                                              opt_state.targets_file, pool));
1603
1604          SVN_INT_ERR(svn_stringbuf_from_file2(&buffer, utf8_targets_file,
1605                                               pool));
1606          SVN_INT_ERR(svn_utf_stringbuf_to_utf8(&buffer_utf8, buffer, pool));
1607
1608          targets = apr_array_append(pool,
1609                         svn_cstring_split(buffer_utf8->data, "\n\r",
1610                                           TRUE, pool),
1611                         targets);
1612
1613          for (i = 0; i < targets->nelts; i++)
1614            {
1615              const char *prefix = APR_ARRAY_IDX(targets, i, const char *);
1616              if (prefix[0] != '/')
1617                prefix = apr_pstrcat(pool, "/", prefix, (char *)NULL);
1618              APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1619            }
1620        }
1621
1622      if (apr_is_empty_array(opt_state.prefixes))
1623        {
1624          svn_error_clear(svn_cmdline_fprintf
1625                          (stderr, pool,
1626                           _("\nError: no prefixes supplied.\n")));
1627          svn_pool_destroy(pool);
1628          return EXIT_FAILURE;
1629        }
1630    }
1631
1632
1633  /* Check that the subcommand wasn't passed any inappropriate options. */
1634  for (i = 0; i < received_opts->nelts; i++)
1635    {
1636      opt_id = APR_ARRAY_IDX(received_opts, i, int);
1637
1638      /* All commands implicitly accept --help, so just skip over this
1639         when we see it. Note that we don't want to include this option
1640         in their "accepted options" list because it would be awfully
1641         redundant to display it in every commands' help text. */
1642      if (opt_id == 'h' || opt_id == '?')
1643        continue;
1644
1645      if (! svn_opt_subcommand_takes_option3(subcommand, opt_id, NULL))
1646        {
1647          const char *optstr;
1648          const apr_getopt_option_t *badopt =
1649            svn_opt_get_option_from_code2(opt_id, options_table, subcommand,
1650                                          pool);
1651          svn_opt_format_option(&optstr, badopt, FALSE, pool);
1652          if (subcommand->name[0] == '-')
1653            SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
1654          else
1655            svn_error_clear(svn_cmdline_fprintf
1656                            (stderr, pool,
1657                             _("Subcommand '%s' doesn't accept option '%s'\n"
1658                               "Type 'svndumpfilter help %s' for usage.\n"),
1659                             subcommand->name, optstr, subcommand->name));
1660          svn_pool_destroy(pool);
1661          return EXIT_FAILURE;
1662        }
1663    }
1664
1665  /* Run the subcommand. */
1666  err = (*subcommand->cmd_func)(os, &opt_state, pool);
1667  if (err)
1668    {
1669      /* For argument-related problems, suggest using the 'help'
1670         subcommand. */
1671      if (err->apr_err == SVN_ERR_CL_INSUFFICIENT_ARGS
1672          || err->apr_err == SVN_ERR_CL_ARG_PARSING_ERROR)
1673        {
1674          err = svn_error_quick_wrap(err,
1675                                     _("Try 'svndumpfilter help' for more "
1676                                       "info"));
1677        }
1678      return svn_cmdline_handle_exit_error(err, pool, "svndumpfilter: ");
1679    }
1680  else
1681    {
1682      svn_pool_destroy(pool);
1683
1684      /* Flush stdout, making sure the user will see any print errors. */
1685      SVN_INT_ERR(svn_cmdline_fflush(stdout));
1686      return EXIT_SUCCESS;
1687    }
1688}
1689